Sauerkraut study - Regression-based Screening on species’ abundance

Code
library(sauerkrautTaxonomyBuddy)
library(SummarizedExperiment) # microbiome analysis
library(mia)                  # microbiome analysis
library(ggtree)               # plot phylogenetic trees

library(readxl)     # read Excel files
library(mgcv)       # regression model
library(NBZIMM)     # zero-inflated Gaussian mixed regression models
library(parallel)   # parallelize processes on multiple cores

library(dplyr)      # data handling
library(tidyr)      # data transformation
library(ggplot2)    # data visualization
library(patchwork)  # grid of ggplot2
library(ggpubr)     # further ggplot2 grid customizations
library(kableExtra) # print tables

# set ggplot2 theme
theme_set(
  theme_minimal() +
    theme(plot.title       = element_text(hjust = 0.5),
          plot.subtitle    = element_text(hjust = 0.5),
          panel.grid.minor = element_blank(),
          plot.background  = element_rect(fill = "white", color = "white"))
)

# suppress scientific notation (= e notation of decimal numbers)
options(scipen = 999)

Data preparation

Code
define_dataPaths()
Code
tse <- readAndPrepare_mainTaxonomicData(path_groupInfoRdata               = path_groupInfoRdata,
                                        path_participantRdata             = path_participantRdata,
                                        path_samplesIDxlsx                = path_samplesIDxlsx,
                                        path_dietInfoCsv                  = path_dietInfoCsv,
                                        path_nutrientInfoCsv              = path_nutrientInfoCsv,
                                        path_bloodMetabolomeXlsx          = path_bloodMetabolomeXlsx,
                                        path_stoolMetabolomeXlsx          = path_stoolMetabolomeXlsx,
                                        path_T4questXlsx                  = path_T4questXlsx,
                                        path_stoolInfoXlsx                = path_stoolInfoXlsx,
                                        path_bodyMeasuresXlsx             = path_bodyMeasuresXlsx,
                                        path_absTaxonomyData              = path_absTaxonomyData,
                                        path_relTaxonomyData              = path_relTaxonomyData,
                                        path_sampleLookupXlsx             = path_sampleLookupXlsx,
                                        path_krautLookupXlsx              = path_krautLookupXlsx,
                                        path_bloodMarkerBostonRdata       = path_bloodMarkerBostonRdata,
                                        path_bloodMarkerZentrallaborRdata = path_bloodMarkerZentrallaborRdata,
                                        path_bloodMarkerBostonXlsx        = path_bloodMarkerBostonXlsx,
                                        path_bloodMarkerZentrallaborXlsx  = path_bloodMarkerZentrallaborXlsx,
                                        aggregation_level                 = "species",
                                        exclude_rareStoolBacteria         = TRUE,
                                        exclude_sickParticipants          = "taxonomy paper")

# create a joint table with all relevant information
dat <- tse %>% mia::meltAssay(assay.type = "relAbd", add_row_data = TRUE, add_col_data = TRUE)
Code
# save the average T1/T3 baseline Shannon diversity as individual variable
dat_baselineDiv <- dat %>% 
  filter(timepoint %in% c("T1", "T3")) %>% 
  group_by(participant_id) %>% 
  summarize(baseline_div = mean(diversity_shannon, na.rm = TRUE))
dat <- dat %>% 
  dplyr::left_join(dat_baselineDiv, by = "participant_id")

# prepare data for regression models
median_baselineDiv <- median(dat_baselineDiv$baseline_div)
dat <- dat %>% 
  filter(intervention != "FollowUp") %>% 
  mutate(relAbd_log     = log(relAbd + 1),
         intervention   = case_when(treatment == "Baseline" ~ "Baseline",
                                    TRUE                    ~ intervention),
         age_centered50 = age - 50,
         bmi_centered25 = bmi_t0 - 25,
         age_cat        = case_when(age < 50 ~ paste0(min(age), "-49"),
                                    TRUE     ~ paste0("50-", max(age))),
         age_cat        = factor(age_cat),
         bmi_cat        = case_when(bmi_t0 < 25 ~ paste0("[", floor(min(bmi_t0)), ", 25)"),
                                    TRUE        ~ paste0("[25, ", ceiling(max(bmi_t0)), "]")),
         bmi_cat        = factor(bmi_cat),
         phase          = case_when(timepoint %in% c("T3","T4") ~ "phase 2",
                                    TRUE                        ~ "phase 1"),
         phase          = factor(phase),
         dailyFiber_cat  = case_when(dailyConsumption_fiber >= 30 ~ ">= 30g",
                                     TRUE                         ~ "< 30g"),
         dailyFiber_cat  = factor(dailyFiber_cat),
         baselineDiv_cat = case_when(baseline_div < median_baselineDiv ~ "[2.8, 3.85)",
                                     TRUE                              ~ "[3.85, 4.4)"),
         baselineDiv_cat = factor(baselineDiv_cat))

Run screening

Run screening on 362 not-low-abundance species.

For each species, a Generalized Mixed Regression Model is estimated, identical to the models estimated for the blood measurements for the blood paper, with response parameter the relative abundance.

Model selection is performed by estimating either estimating (A) a Gaussian model, and (B) a Gaussian model on ‘log(y + 1)’ transformed values, or (A) a zero-inflated Gaussian model (with only an intercept predictor for the ZI part, as a general excess zero correction), and (B) a zero-inflated Gaussian model on ‘log(y + 1)’ transformed values for each gene and selecting the best performing one of the respective two as the final model. If at least 10% of a gene’s observations are zero values then the zero-inflated models are estimated, otherwise the non-zero-inflated models are estimated.

Notes:

  • For the non-zero-inflated models the best model is chosen based on the share of explained deviance
  • For the zero-inflated models the best model is chosen based on the lower average of all absolute residuals, since these models are estimated based on a quasi-likelihood approach and the deviance is not available.

Unused alternatives

  • a zero-inflated Poisson model could have been estimated on rounded values. However, a Poisson distribution is not able to account for more strongly right-skewed distributions with a stronger probability mass at zero
  • a Tweedie (p = 1.5) distribution would be a right-skewed continuous distribution with also potential probability mass at zero. However, since (1) it’s not a proper established distribution and (2) the used log transformation already accounts for right-skewedness we don’t need it and don’t use it

Correction for multiple testing is performed by applying the Benjamini-Hochberg (FDR) method to all interventions’ p-values.

Additionally, the intervention effects for every model are estimated in separate interactions with gender (male or female), age (above or below 50), baseline BMI (above or below 25), daily fiber consumption (above or below 30), baseline Shannon diversity (above or below tbd). Independently for every interaction variable (age, sex, BMI, fiber, diversity) a multiple testing correction is applied.

Note: For these screening analyses we use a significance threshold of 0.1 for q-values and 0.05 for uncorrected p-values.

The models are estimated with mgcv::gam() and NBZIMM::lme.zig().

Code
# helper function to estimate the models
# - dat_model:     Dataset used for model estimation
# - model_type:    One of c("Gaussian", "ZI Gaussian")
# - model_formula: A model formula created with "as.formula"
estimate_model <- function(dat_model, model_type, model_formula) {
  
  checkmate::assert_data_frame(dat_model)
  checkmate::assert_choice(model_type, choices = c("Gaussian", "ZI Gaussian"))
  checkmate::assert_class(model_formula, classes = "formula")
  
  
  if (model_type == "Gaussian") {
    model <- gam(model_formula, method = "REML", family = "gaussian", data = dat_model)
    
  } else { # model_type = "ZI Gaussian"
    model <- tryCatch({ lme.zig(fixed     = model_formula,
                                zi_fixed  = ~ 1,
                                random    = ~ 1 | participant_id,
                                zi_random = NULL,
                                data      = dat_model) },
                      error = function(e) { return(NULL) })
  }
  
  return(model)
}
Code
# helper function to estimate the interaction models.
# Returns a list with all three estimated models and the respective three summary tables.
# Argument 'selected_model' is the label of the previously selected model type in the model selection process.
estimate_interactionModels <- function(dat_model, selected_model) {
  
  if (selected_model == "ZI Gaussian") {
    # age interaction model
    model_ageInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:age_cat"))
    if (is.null(model_ageInt)) { # if model estimation failed
      model_ageInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:age_cat"))
    }
    # sex interaction model
    model_sexInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:gender"))
    if (is.null(model_sexInt)) { # if model estimation failed
      model_sexInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:gender"))
    }
    # BMI interaction model
    model_bmiInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:bmi_cat"))
    if (is.null(model_bmiInt)) { # if model estimation failed
      model_bmiInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:bmi_cat"))
    }
    # fiber interaction model
    model_fibInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:dailyFiber_cat"))
    if (is.null(model_fibInt)) { # if model estimation failed
      model_fibInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:dailyFiber_cat"))
    }
    # baseline diversity interaction model
    model_divInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:baselineDiv_cat"))
    if (is.null(model_divInt)) { # if model estimation failed
      model_divInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:baselineDiv_cat"))
    }
    
  } else if (selected_model == "ZI log-Gaussian") {
    # age interaction model
    model_ageInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:age_cat"))
    if (is.null(model_ageInt)) { # if model estimation failed
      model_ageInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:age_cat"))
    }
    # sex interaction model
    model_sexInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:gender"))
    if (is.null(model_sexInt)) { # if model estimation failed
      model_sexInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:gender"))
    }
    # BMI interaction model
    model_bmiInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:bmi_cat"))
    if (is.null(model_bmiInt)) { # if model estimation failed
      model_bmiInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:bmi_cat"))
    }
    # fiber interaction model
    model_fibInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:dailyFiber_cat"))
    if (is.null(model_fibInt)) { # if model estimation failed
      model_fibInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:dailyFiber_cat"))
    }
    # baseline diversity interaction model
    model_divInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:baselineDiv_cat"))
    if (is.null(model_divInt)) { # if model estimation failed
      model_divInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:baselineDiv_cat"))
    }
    
  } else if (selected_model == "Gaussian") {
    # age interaction model
    model_ageInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:age_cat"))
    # sex interaction model
    model_sexInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:gender"))
    # BMI interaction model
    model_bmiInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:bmi_cat"))
    
  } else { # model_label == "log-Gaussian"
    # age interaction model
    model_ageInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:age_cat"))
    # sex interaction model
    model_sexInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:gender"))
    # BMI interaction model
    model_bmiInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:bmi_cat"))
    # fiber interaction model
    model_fibInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:dailyFiber_cat"))
    # baseline diversity interaction model
    model_divInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:baselineDiv_cat"))
  }
  
  
  ### prepare results
  # age interaction model
  if (class(model_ageInt)[1] == "zigmm") {
    tab_ageInt           <- summary(model_ageInt)$tTable
    colnames(tab_ageInt) <- c("Estimate", "Std. Error", "DF", "t-value", "Pr(>|t|)") # make names consistent to gam tab names
  } else { # non-ZI model
    tab_ageInt           <- summary(model_ageInt)$p.table
  }
  # sex interaction model
  if (class(model_sexInt)[1] == "zigmm") {
    tab_sexInt           <- summary(model_sexInt)$tTable
    colnames(tab_sexInt) <- c("Estimate", "Std. Error", "DF", "t-value", "Pr(>|t|)") # make names consistent to gam tab names
  } else { # non-ZI model
    tab_sexInt           <- summary(model_sexInt)$p.table
  }
  # BMI interaction model
  if (class(model_bmiInt)[1] == "zigmm") {
    tab_bmiInt           <- summary(model_bmiInt)$tTable
    colnames(tab_bmiInt) <- c("Estimate", "Std. Error", "DF", "t-value", "Pr(>|t|)") # make names consistent to gam tab names
  } else { # non-ZI model
    tab_bmiInt           <- summary(model_bmiInt)$p.table
  }
  # fiber interaction model
  if (class(model_fibInt)[1] == "zigmm") {
    tab_fibInt           <- summary(model_fibInt)$tTable
    colnames(tab_fibInt) <- c("Estimate", "Std. Error", "DF", "t-value", "Pr(>|t|)") # make names consistent to gam tab names
  } else { # non-ZI model
    tab_fibInt           <- summary(model_fibInt)$p.table
  }
  # baseline diversity interaction model
  if (class(model_divInt)[1] == "zigmm") {
    tab_divInt           <- summary(model_divInt)$tTable
    colnames(tab_divInt) <- c("Estimate", "Std. Error", "DF", "t-value", "Pr(>|t|)") # make names consistent to gam tab names
  } else { # non-ZI model
    tab_divInt           <- summary(model_divInt)$p.table
  }

  
  list(model_ageInt = model_ageInt,
       model_sexInt = model_sexInt,
       model_bmiInt = model_bmiInt,
       model_fibInt = model_fibInt,
       model_divInt = model_divInt,
       tab_ageInt   = tab_ageInt,
       tab_sexInt   = tab_sexInt,
       tab_bmiInt   = tab_bmiInt,
       tab_fibInt   = tab_fibInt,
       tab_divInt   = tab_divInt)
}
Code
# main screening function for species
runScreening_oneSpecies <- function(sp) {
  
  message("Process species ", sp, " (", match(sp, species), "/", length(species), ")")
  
  dat_sp <- dat[dat$FeatureID == sp,]
  
  # 1) main model estimation, based on model selection
  estimate_ZImodels <- sum(dat_sp$relAbd == 0) >= (0.1*nrow(dat_sp))
  if (estimate_ZImodels) { # estimate zero-inflated models
    
    # these zero-inflated models sometimes run into an estimation error. In this case, ignore the respective model.
    # If both models run into an estimation error, simply estimate the non-zero-inflated models instead.
    model_list <- list("ZI Gaussian"     = estimate_model(dat_model     = dat_sp,
                                                          model_type    = "ZI Gaussian",
                                                          model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25")),
                       "ZI log-Gaussian" = estimate_model(dat_model     = dat_sp,
                                                          model_type    = "ZI Gaussian",
                                                          model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25")))
    
    fallBack_toNonZImodels <- ifelse(is.null(model_list[[1]]) & is.null(model_list[[2]]), TRUE, FALSE)
    
    if (!fallBack_toNonZImodels) {
      
      if (is.null(model_list[[1]]) | is.null(model_list[[2]])) {
        null_index <- unname(which(sapply(model_list, function(x) { is.null(x) })))
        model_list <- model_list[-null_index]
      }
      
      bestModel_index <- unname(which.min(sapply(model_list, function(x) { mean(abs(summary(x)$residuals)) })))
      model           <- model_list[[bestModel_index]]
      model_label     <- names(model_list)[bestModel_index]
      tab             <- summary(model)$tTable
      colnames(tab)   <- c("Estimate", "Std. Error", "DF", "t-value", "Pr(>|t|)") # make names consistent to gam tab names
    }
    
  } else {
    fallBack_toNonZImodels <- FALSE # simply to have this object defined also if ZI models are not estimated
  }
  
  if (!estimate_ZImodels || fallBack_toNonZImodels) { # estimate models without zero-inflation
    
    model_list <- list("Gaussian"     = estimate_model(dat_model     = dat_sp,
                                                       model_type    = "Gaussian",
                                                       model_formula = as.formula("relAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're')")),
                       "log-Gaussian" = estimate_model(dat_model     = dat_sp,
                                                       model_type    = "Gaussian",
                                                       model_formula = as.formula("relAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're')")))
    
    bestModel_index <- unname(which.max(sapply(model_list, function(x) { summary(x)$dev.expl })))
    model           <- model_list[[bestModel_index]]
    model_label     <- names(model_list)[bestModel_index]
    tab             <- summary(model)$p.table
    
  }
  
  
  # 2) interaction model estimation, only for the final selected model
  # model estimation with the lower category of the interaction variable as the reference category
  model_intList <- estimate_interactionModels(dat_model      = dat_sp,
                                              selected_model = model_label)
  # model estimation with the higher category of the interaction variable as the reference category
  dat_sp_changedRef         <- dat_sp
  dat_sp_changedRef$gender  <- relevel(dat_sp_changedRef$gender,  ref = "W")
  dat_sp_changedRef$age_cat <- relevel(dat_sp_changedRef$age_cat, ref = "50-69")
  dat_sp_changedRef$bmi_cat <- relevel(dat_sp_changedRef$bmi_cat, ref = "[25, 31]")
  dat_sp_changedRef$dailyFiber_cat  <- relevel(dat_sp_changedRef$dailyFiber_cat,  ref = ">= 30g")
  dat_sp_changedRef$baselineDiv_cat <- relevel(dat_sp_changedRef$baselineDiv_cat, ref = "[3.85, 4.4)")
  model_intList_changedRef  <- estimate_interactionModels(dat_model      = dat_sp_changedRef,
                                                          selected_model = model_label)
  
  # create joint table
  tab_intEffects <- data.frame(effect_type    = c("fresh intervention", "pasteurized intervention"),
                               effect_ageLow  = c(model_intList$tab_ageInt["interventionFresh", "Estimate"],
                                                  model_intList$tab_ageInt["interventionPasteurized", "Estimate"]),
                               effect_ageHigh = c(model_intList_changedRef$tab_ageInt["interventionFresh", "Estimate"],
                                                  model_intList_changedRef$tab_ageInt["interventionPasteurized", "Estimate"]),
                               se_ageLow      = c(model_intList$tab_ageInt["interventionFresh", "Std. Error"],
                                                  model_intList$tab_ageInt["interventionPasteurized", "Std. Error"]),
                               se_ageHigh     = c(model_intList_changedRef$tab_ageInt["interventionFresh", "Std. Error"],
                                                  model_intList_changedRef$tab_ageInt["interventionPasteurized", "Std. Error"]),
                               pvalue_ageLow  = c(model_intList$tab_ageInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList$tab_ageInt["interventionPasteurized", "Pr(>|t|)"]),
                               pvalue_ageHigh = c(model_intList_changedRef$tab_ageInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList_changedRef$tab_ageInt["interventionPasteurized", "Pr(>|t|)"]),
                               effect_sexM    = c(model_intList$tab_sexInt["interventionFresh", "Estimate"],
                                                  model_intList$tab_sexInt["interventionPasteurized", "Estimate"]),
                               effect_sexW    = c(model_intList_changedRef$tab_sexInt["interventionFresh", "Estimate"],
                                                  model_intList_changedRef$tab_sexInt["interventionPasteurized", "Estimate"]),
                               se_sexM        = c(model_intList$tab_sexInt["interventionFresh", "Std. Error"],
                                                  model_intList$tab_sexInt["interventionPasteurized", "Std. Error"]),
                               se_sexW        = c(model_intList_changedRef$tab_sexInt["interventionFresh", "Std. Error"],
                                                  model_intList_changedRef$tab_sexInt["interventionPasteurized", "Std. Error"]),
                               pvalue_sexM    = c(model_intList$tab_sexInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList$tab_sexInt["interventionPasteurized", "Pr(>|t|)"]),
                               pvalue_sexW    = c(model_intList_changedRef$tab_sexInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList_changedRef$tab_sexInt["interventionPasteurized", "Pr(>|t|)"]),
                               effect_bmiLow  = c(model_intList$tab_bmiInt["interventionFresh", "Estimate"],
                                                  model_intList$tab_bmiInt["interventionPasteurized", "Estimate"]),
                               effect_bmiHigh = c(model_intList_changedRef$tab_bmiInt["interventionFresh", "Estimate"],
                                                  model_intList_changedRef$tab_bmiInt["interventionPasteurized", "Estimate"]),
                               se_bmiLow      = c(model_intList$tab_bmiInt["interventionFresh", "Std. Error"],
                                                  model_intList$tab_bmiInt["interventionPasteurized", "Std. Error"]),
                               se_bmiHigh     = c(model_intList_changedRef$tab_bmiInt["interventionFresh", "Std. Error"],
                                                  model_intList_changedRef$tab_bmiInt["interventionPasteurized", "Std. Error"]),
                               pvalue_bmiLow  = c(model_intList$tab_bmiInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList$tab_bmiInt["interventionPasteurized", "Pr(>|t|)"]),
                               pvalue_bmiHigh = c(model_intList_changedRef$tab_bmiInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList_changedRef$tab_bmiInt["interventionPasteurized", "Pr(>|t|)"]),
                               effect_fibLow  = c(model_intList$tab_fibInt["interventionFresh", "Estimate"],
                                                  model_intList$tab_fibInt["interventionPasteurized", "Estimate"]),
                               effect_fibHigh = c(model_intList_changedRef$tab_fibInt["interventionFresh", "Estimate"],
                                                  model_intList_changedRef$tab_fibInt["interventionPasteurized", "Estimate"]),
                               se_fibLow      = c(model_intList$tab_fibInt["interventionFresh", "Std. Error"],
                                                  model_intList$tab_fibInt["interventionPasteurized", "Std. Error"]),
                               se_fibHigh     = c(model_intList_changedRef$tab_fibInt["interventionFresh", "Std. Error"],
                                                  model_intList_changedRef$tab_fibInt["interventionPasteurized", "Std. Error"]),
                               pvalue_fibLow  = c(model_intList$tab_fibInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList$tab_fibInt["interventionPasteurized", "Pr(>|t|)"]),
                               pvalue_fibHigh = c(model_intList_changedRef$tab_fibInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList_changedRef$tab_fibInt["interventionPasteurized", "Pr(>|t|)"]),
                               effect_divLow  = c(model_intList$tab_divInt["interventionFresh", "Estimate"],
                                                  model_intList$tab_divInt["interventionPasteurized", "Estimate"]),
                               effect_divHigh = c(model_intList_changedRef$tab_divInt["interventionFresh", "Estimate"],
                                                  model_intList_changedRef$tab_divInt["interventionPasteurized", "Estimate"]),
                               se_divLow      = c(model_intList$tab_divInt["interventionFresh", "Std. Error"],
                                                  model_intList$tab_divInt["interventionPasteurized", "Std. Error"]),
                               se_divHigh     = c(model_intList_changedRef$tab_divInt["interventionFresh", "Std. Error"],
                                                  model_intList_changedRef$tab_divInt["interventionPasteurized", "Std. Error"]),
                               pvalue_divLow  = c(model_intList$tab_divInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList$tab_divInt["interventionPasteurized", "Pr(>|t|)"]),
                               pvalue_divHigh = c(model_intList_changedRef$tab_divInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList_changedRef$tab_divInt["interventionPasteurized", "Pr(>|t|)"]))
  
  
  # 3) gather results
  data.frame(species                 = sp,
             relFreq_zeroRelAbd      = sum(dat_sp$relAbd == 0) / nrow(dat_sp),
             relFreq_min0.01relAbd   = sum(dat_sp$relAbd >= 0.01) / nrow(dat_sp),
             min_relAbd              = min(dat_sp$relAbd),
             mean_relAbd             = mean(dat_sp$relAbd),
             median_relAbd           = median(dat_sp$relAbd),
             max_relAbd              = max(dat_sp$relAbd),
             baseline_medianRelAbd   = median(dat_sp$relAbd[dat_sp$inter_treat %in% c("Baseline.Fresh","Baseline.Pasteurized")]),
             baseline_prevalence     = unname(prop.table(table(dat_sp$relAbd[dat_sp$inter_treat %in% c("Baseline.Fresh","Baseline.Pasteurized")] > 0.01))["TRUE"]),
             postFresh_medianRelAbd  = median(dat_sp$relAbd[dat_sp$inter_treat == "After.Fresh"]),
             postFresh_prevalence    = unname(prop.table(table(dat_sp$relAbd[dat_sp$inter_treat == "After.Fresh"] > 0.01))["TRUE"]),
             postPast_medianRelAbd   = median(dat_sp$relAbd[dat_sp$inter_treat == "After.Pasteurized"]),
             postPast_prevalence     = unname(prop.table(table(dat_sp$relAbd[dat_sp$inter_treat == "After.Pasteurized"] > 0.01))["TRUE"]),
             model                   = model_label,
             model_deviance          = ifelse(!grepl("ZI", model_label), summary(model)$dev.expl, NA),
             model_meanAbsResid      = ifelse(grepl("ZI", model_label), mean(abs(summary(model)$residuals)), NA),
             freshInt_effect         = tab["interventionFresh", "Estimate"],
             freshInt_se             = tab["interventionFresh", "Std. Error"],
             freshInt_pvalue         = tab["interventionFresh", "Pr(>|t|)"],
             pastInt_effect          = tab["interventionPasteurized", "Estimate"],
             pastInt_se              = tab["interventionPasteurized", "Std. Error"],
             pastInt_pvalue          = tab["interventionPasteurized", "Pr(>|t|)"],
             phaseEffect             = tab["phasephase 2", "Estimate"],
             genderEffect            = tab["genderW", "Estimate"],
             ageEffect               = tab["age_centered50", "Estimate"],
             bmiEffect               = tab["bmi_centered25", "Estimate"],
             freshInt_ageLow_effect  = tab_intEffects$effect_ageLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageHigh_effect = tab_intEffects$effect_ageHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageLow_se      = tab_intEffects$se_ageLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageHigh_se     = tab_intEffects$se_ageHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageLow_pvalue  = tab_intEffects$pvalue_ageLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageHigh_pvalue = tab_intEffects$pvalue_ageHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexM_effect    = tab_intEffects$effect_sexM[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexW_effect    = tab_intEffects$effect_sexW[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexM_se        = tab_intEffects$se_sexM[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexW_se        = tab_intEffects$se_sexW[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexM_pvalue    = tab_intEffects$pvalue_sexM[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexW_pvalue    = tab_intEffects$pvalue_sexW[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiLow_effect  = tab_intEffects$effect_bmiLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiHigh_effect = tab_intEffects$effect_bmiHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiLow_se      = tab_intEffects$se_bmiLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiHigh_se     = tab_intEffects$se_bmiHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiLow_pvalue  = tab_intEffects$pvalue_bmiLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiHigh_pvalue = tab_intEffects$pvalue_bmiHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_fibLow_effect  = tab_intEffects$effect_fibLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_fibHigh_effect = tab_intEffects$effect_fibHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_fibLow_se      = tab_intEffects$se_fibLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_fibHigh_se     = tab_intEffects$se_fibHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_fibLow_pvalue  = tab_intEffects$pvalue_fibLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_fibHigh_pvalue = tab_intEffects$pvalue_fibHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_divLow_effect  = tab_intEffects$effect_divLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_divHigh_effect = tab_intEffects$effect_divHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_divLow_se      = tab_intEffects$se_divLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_divHigh_se     = tab_intEffects$se_divHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_divLow_pvalue  = tab_intEffects$pvalue_divLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_divHigh_pvalue = tab_intEffects$pvalue_divHigh[tab_intEffects$effect_type == "fresh intervention"],
             pastInt_ageLow_effect   = tab_intEffects$effect_ageLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageHigh_effect  = tab_intEffects$effect_ageHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageLow_se       = tab_intEffects$se_ageLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageHigh_se      = tab_intEffects$se_ageHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageLow_pvalue   = tab_intEffects$pvalue_ageLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageHigh_pvalue  = tab_intEffects$pvalue_ageHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexM_effect     = tab_intEffects$effect_sexM[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexW_effect     = tab_intEffects$effect_sexW[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexM_se         = tab_intEffects$se_sexM[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexW_se         = tab_intEffects$se_sexW[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexM_pvalue     = tab_intEffects$pvalue_sexM[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexW_pvalue     = tab_intEffects$pvalue_sexW[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiLow_effect   = tab_intEffects$effect_bmiLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiHigh_effect  = tab_intEffects$effect_bmiHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiLow_se       = tab_intEffects$se_bmiLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiHigh_se      = tab_intEffects$se_bmiHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiLow_pvalue   = tab_intEffects$pvalue_bmiLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiHigh_pvalue  = tab_intEffects$pvalue_bmiHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_fibLow_effect   = tab_intEffects$effect_fibLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_fibHigh_effect  = tab_intEffects$effect_fibHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_fibLow_se       = tab_intEffects$se_fibLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_fibHigh_se      = tab_intEffects$se_fibHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_fibLow_pvalue   = tab_intEffects$pvalue_fibLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_fibHigh_pvalue  = tab_intEffects$pvalue_fibHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_divLow_effect   = tab_intEffects$effect_divLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_divHigh_effect  = tab_intEffects$effect_divHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_divLow_se       = tab_intEffects$se_divLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_divHigh_se      = tab_intEffects$se_divHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_divLow_pvalue   = tab_intEffects$pvalue_divLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_divHigh_pvalue  = tab_intEffects$pvalue_divHigh[tab_intEffects$effect_type == "pasteurized intervention"])
}
Code
# code for running the screening. It takes about 3 minutes on 15 cores
# to estimate the results. Instead of newly estimating them each time
# the html is generated, the results were saved after running the screening
# once, and are now just read from the results csv
dat_results <- read.csv("8_species_regressionScreening_results.csv")

# species <- dat %>% pull(FeatureID) %>% unique() %>% as.character()
# 
# # number of cores to use for the parallelized call
# n_cores <- 15
# 
# # create a cluster object for Windows parallelization
# cl <- makeCluster(n_cores)
# 
# # export necessary functions / data objects to the cluster workers
# clusterExport(cl, c("runScreening_oneSpecies", "estimate_model", "estimate_interactionModels",
#                     "species", "dat", "gam", "lme.zig", "summary.gam"))
# 
# # run the screening (parallelized call)
# datList_results <- parLapply(cl, species, runScreening_oneSpecies)
# 
# # stop the Windows cluster
# stopCluster(cl)
# 
# # merge results datasets
# dat_results <- datList_results %>% dplyr::bind_rows()
# 
# # perform multiple testing correction and round values in the data
# p_corrected <- p.adjust(c(dat_results$freshInt_pvalue, dat_results$pastInt_pvalue), method = "BH")
# pAgeInt_corrected <- p.adjust(c(dat_results$freshInt_ageLow_pvalue,  dat_results$pastInt_ageLow_pvalue,
#                                 dat_results$freshInt_ageHigh_pvalue, dat_results$pastInt_ageHigh_pvalue), method = "BH")
# pSexInt_corrected <- p.adjust(c(dat_results$freshInt_sexM_pvalue,  dat_results$pastInt_sexM_pvalue,
#                                 dat_results$freshInt_sexW_pvalue, dat_results$pastInt_sexW_pvalue), method = "BH")
# pBMIInt_corrected <- p.adjust(c(dat_results$freshInt_bmiLow_pvalue,  dat_results$pastInt_bmiLow_pvalue,
#                                 dat_results$freshInt_bmiHigh_pvalue, dat_results$pastInt_bmiHigh_pvalue), method = "BH")
# pFibInt_corrected <- p.adjust(c(dat_results$freshInt_fibLow_pvalue,  dat_results$pastInt_fibLow_pvalue,
#                                 dat_results$freshInt_fibHigh_pvalue, dat_results$pastInt_fibHigh_pvalue), method = "BH")
# pDivInt_corrected <- p.adjust(c(dat_results$freshInt_divLow_pvalue,  dat_results$pastInt_divLow_pvalue,
#                                 dat_results$freshInt_divHigh_pvalue, dat_results$pastInt_divHigh_pvalue), method = "BH")
# n           <- nrow(dat_results)
# dat_results <- dat_results %>%
#   mutate(freshInt_pvalue_correct          = p_corrected[1:n],
#          pastInt_pvalue_correct           = p_corrected[(n+1):(2*n)],
#          freshInt_ageLow_pvalue_correct   = pAgeInt_corrected[1:n],
#          pastInt_ageLow_pvalue_correct    = pAgeInt_corrected[(n+1):(2*n)],
#          freshInt_ageHigh_pvalue_correct  = pAgeInt_corrected[(2*n+1):(3*n)],
#          pastInt_ageHigh_pvalue_correct   = pAgeInt_corrected[(3*n+1):(4*n)],
#          freshInt_sexM_pvalue_correct     = pSexInt_corrected[1:n],
#          pastInt_sexM_pvalue_correct      = pSexInt_corrected[(n+1):(2*n)],
#          freshInt_sexW_pvalue_correct     = pSexInt_corrected[(2*n+1):(3*n)],
#          pastInt_sexW_pvalue_correct      = pSexInt_corrected[(3*n+1):(4*n)],
#          freshInt_bmiLow_pvalue_correct   = pBMIInt_corrected[1:n],
#          pastInt_bmiLow_pvalue_correct    = pBMIInt_corrected[(n+1):(2*n)],
#          freshInt_bmiHigh_pvalue_correct  = pBMIInt_corrected[(2*n+1):(3*n)],
#          pastInt_bmiHigh_pvalue_correct   = pBMIInt_corrected[(3*n+1):(4*n)],
#          freshInt_fibLow_pvalue_correct   = pFibInt_corrected[1:n],
#          pastInt_fibLow_pvalue_correct    = pFibInt_corrected[(n+1):(2*n)],
#          freshInt_fibHigh_pvalue_correct  = pFibInt_corrected[(2*n+1):(3*n)],
#          pastInt_fibHigh_pvalue_correct   = pFibInt_corrected[(3*n+1):(4*n)],
#          freshInt_divLow_pvalue_correct   = pDivInt_corrected[1:n],
#          pastInt_divLow_pvalue_correct    = pDivInt_corrected[(n+1):(2*n)],
#          freshInt_divHigh_pvalue_correct  = pDivInt_corrected[(2*n+1):(3*n)],
#          pastInt_divHigh_pvalue_correct   = pDivInt_corrected[(3*n+1):(4*n)]) %>%
#   mutate_at(c(2:13, 15:ncol(.)), function(x) { round(x, 4) })
# 
# write.csv(dat_results, file = "8_species_regressionScreening_results.csv", row.names = FALSE)

Results

Overview of the numbers of significant genes
Code
data.frame("what"  = c("No. of species with relevant abundance",
                       "No. of significant species (fresh intervention)",
                       "No. of significant species (past. intervention)"),
           "value" = c(nrow(dat_results),
                       sum(dat_results$freshInt_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_pvalue_correct < 0.1, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of species with relevant abundance 362
No. of significant species (fresh intervention) 1
No. of significant species (past. intervention) 2
Overview which specific species are significant
Code
dat_results %>% 
  filter((freshInt_pvalue_correct < 0.1) |
           (pastInt_pvalue_correct < 0.1)) %>% 
  dplyr::select(-ends_with("pvalue")) %>% 
  mutate(across(contains("pvalue"), function(x) { ifelse(x < 0.1, x, "not significant") })) %>% 
  dplyr::select(species, contains("pvalue")) %>% 
  arrange(freshInt_pvalue_correct, pastInt_pvalue_correct) %>% 
  kable() %>% 
  kable_styling()
species freshInt_pvalue_correct pastInt_pvalue_correct freshInt_ageLow_pvalue_correct pastInt_ageLow_pvalue_correct freshInt_ageHigh_pvalue_correct pastInt_ageHigh_pvalue_correct freshInt_sexM_pvalue_correct pastInt_sexM_pvalue_correct freshInt_sexW_pvalue_correct pastInt_sexW_pvalue_correct freshInt_bmiLow_pvalue_correct pastInt_bmiLow_pvalue_correct freshInt_bmiHigh_pvalue_correct pastInt_bmiHigh_pvalue_correct freshInt_fibLow_pvalue_correct pastInt_fibLow_pvalue_correct freshInt_fibHigh_pvalue_correct pastInt_fibHigh_pvalue_correct freshInt_divLow_pvalue_correct pastInt_divLow_pvalue_correct freshInt_divHigh_pvalue_correct pastInt_divHigh_pvalue_correct
species:Lacticaseibacillus_paracasei 0 not significant 0 not significant 0.0393 not significant 0.0005 not significant 0 not significant 0 not significant 0.0127 not significant 0 not significant 0.0029 not significant 0 not significant 0 not significant
species:Anaerostipes_hadrus not significant 0.0436 not significant 0.0247 not significant not significant not significant not significant not significant not significant not significant 0.0926 not significant not significant not significant 0.0832 not significant not significant not significant not significant not significant not significant
species:Blautia_obeum not significant 0.0746 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant

… alternatively, looking at the p-values before correction for multiple testing:

Code
data.frame("what"  = c("No. of species with relevant abundance",
                       "No. of significant species (fresh intervention)",
                       "No. of significant species (past. intervention)"),
           "value" = c(nrow(dat_results),
                       sum(dat_results$freshInt_pvalue < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_pvalue < 0.1, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of species with relevant abundance 362
No. of significant species (fresh intervention) 40
No. of significant species (past. intervention) 48
Code
dat_results %>% 
  filter((freshInt_pvalue < 0.1) |
           (pastInt_pvalue < 0.1)) %>% 
  mutate(across(ends_with("pvalue"), function(x) { ifelse(x < 0.1, x, "not significant") })) %>% 
  dplyr::select(species, ends_with("pvalue")) %>% 
  arrange(freshInt_pvalue, pastInt_pvalue) %>% 
  kable() %>% 
  kable_styling()
species freshInt_pvalue pastInt_pvalue freshInt_ageLow_pvalue freshInt_ageHigh_pvalue freshInt_sexM_pvalue freshInt_sexW_pvalue freshInt_bmiLow_pvalue freshInt_bmiHigh_pvalue freshInt_fibLow_pvalue freshInt_fibHigh_pvalue freshInt_divLow_pvalue freshInt_divHigh_pvalue pastInt_ageLow_pvalue pastInt_ageHigh_pvalue pastInt_sexM_pvalue pastInt_sexW_pvalue pastInt_bmiLow_pvalue pastInt_bmiHigh_pvalue pastInt_fibLow_pvalue pastInt_fibHigh_pvalue pastInt_divLow_pvalue pastInt_divHigh_pvalue
species:Lacticaseibacillus_paracasei 0 not significant 0 0.0001 0 0 0 0 0 0 0 0 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Erysipelotrichaceae_bacterium 0.0068 not significant not significant 0.0075 0.0018 not significant 0.0395 0.0783 0.0222 not significant 0.093 0.0325 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:GGB9512_SGB14909 0.0072 not significant 0.0042 not significant 0.0372 0.0757 not significant 0.0002 0.0787 0.0081 0.0009 not significant not significant not significant not significant not significant not significant not significant not significant 0.0393 not significant not significant
species:Blautia_glucerasea 0.0122 0.0007 not significant 0.0071 not significant 0.0539 not significant 0.0399 0.0183 not significant not significant 0.0165 0.0043 0.0557 0.0777 0.0032 0.0009 not significant 0.0024 not significant 0.0244 0.011
species:Clostridia_bacterium_UC5_1_1D1 0.0148 not significant 0.019 not significant not significant 0.0207 0.0958 0.0665 0.0149 not significant 0.0023 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Eubacterium_sp_AF15_50 0.0181 not significant not significant 0.0154 0.0074 not significant 0.0533 not significant 0.0325 not significant not significant 0.0167 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:GGB51959_SGB72479 0.0251 not significant not significant not significant not significant 0.0375 0.0242 not significant 0.0668 not significant 0.0043 not significant not significant not significant not significant not significant 0.0465 not significant not significant 0.0091 not significant not significant
species:Bacteroidaceae_bacterium 0.0258 not significant not significant 0.0263 0.0021 not significant not significant 0.0013 0.0114 not significant 0.0969 not significant not significant not significant not significant not significant not significant 0.0783 not significant 0.089 not significant not significant
species:Anaerotignum_faecicola 0.0283 not significant 0.0408 not significant not significant 0.0394 not significant 0.0995 0.0187 not significant 0.048 not significant not significant not significant 0.0038 not significant not significant 0.0439 not significant 0.0968 not significant 0.0522
species:GGB6649_SGB9391 0.0288 not significant not significant 0.0375 not significant 0.0663 0.0318 not significant not significant 0.0958 not significant 0.0159 not significant 0.081 not significant not significant not significant not significant not significant not significant not significant not significant
species:GGB4651_SGB6438 0.0329 not significant 0.0547 not significant not significant 0.0444 0.0634 not significant not significant 0.0415 not significant 0.0499 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Eisenbergiella_tayi 0.0333 not significant not significant 0.0555 not significant 0.0476 not significant not significant 0.0282 not significant 0.0386 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Blautia_sp_OF03_15BH 0.034 not significant not significant not significant not significant 0.0665 not significant not significant not significant 0.019 not significant 0.084 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Candidatus_Avimicrobium_caecorum 0.0347 not significant not significant not significant not significant not significant not significant 0.0223 0.0423 not significant not significant 0.0429 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:GGB9561_SGB14972 0.0366 not significant not significant 0.0001 not significant 0.0028 0.0963 not significant not significant 0.0293 not significant 0.0108 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Mediterraneibacter_butyricigenes 0.0388 not significant 0.085 not significant not significant 0.0114 0.0153 not significant 0.0102 not significant 0.0703 not significant not significant not significant not significant not significant 0.0814 not significant not significant not significant not significant 0.073
species:Bacteroides_finegoldii 0.0415 not significant not significant 0.0006 not significant not significant 0.0104 not significant 0.0377 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Veillonella_rogosae 0.0415 not significant 0.0656 not significant 0.0193 not significant 0.0141 not significant not significant 0.0153 not significant 0.0495 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Holdemania_filiformis 0.043 not significant 0.0597 not significant not significant 0.0451 not significant 0.0225 0.0104 not significant 0.0926 not significant not significant 0.029 not significant 0.0752 not significant not significant 0.0703 not significant not significant not significant
species:Dialister_invisus 0.0439 not significant not significant 0.041 not significant 0.0527 not significant 0.0065 not significant 0.0067 0.0203 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:GGB9787_SGB15410 0.0466 not significant not significant not significant not significant not significant not significant 0.0146 0.0306 not significant not significant 0.0458 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Bacteroides_fragilis 0.0495 not significant not significant 0.0192 not significant 0.0396 0.0167 not significant not significant not significant 0.0361 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:GGB9709_SGB15238 0.0511 not significant not significant 0.0039 not significant 0.0225 not significant 0.007 0.0335 not significant 0.0494 not significant not significant 0.0341 not significant not significant not significant not significant not significant not significant not significant not significant
species:Senegalimassilia_faecalis 0.0631 not significant not significant 0.0541 not significant not significant not significant 0.0369 0.0318 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Erysipelatoclostridium_ramosum 0.064 not significant not significant 0.0045 not significant not significant 0.0115 not significant not significant 0.008 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Ruminococcus_SGB4421 0.0641 0.0797 not significant not significant not significant 0.0323 not significant not significant not significant 0.0333 not significant 0.0209 not significant not significant not significant 0.0826 not significant 0.0479 not significant not significant not significant not significant
species:Gordonibacter_urolithinfaciens 0.0662 not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0425 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Clostridia_unclassified_SGB14844 0.0674 not significant not significant 0.0223 not significant not significant 0.0293 not significant not significant 0.0263 0.0369 not significant not significant not significant not significant not significant not significant not significant not significant 0.0553 not significant not significant
species:Phascolarctobacterium_faecium 0.0674 not significant not significant 0.0955 0.0397 not significant not significant 0.0203 not significant 0.0036 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Roseburia_inulinivorans 0.0775 0.0177 not significant 0.0376 not significant not significant not significant 0.0363 0.0661 not significant not significant not significant 0.0415 not significant 0.0403 not significant not significant 0.056 0.0064 not significant 0.0371 not significant
species:Clostridium_sp_AF27_2AA 0.0859 not significant not significant not significant not significant not significant not significant not significant 0.0441 not significant 0.0107 not significant not significant 0.0288 0.0066 not significant not significant 0.0056 not significant not significant not significant not significant
species:Intestinibacter_SGB6139 0.0865 not significant not significant 0.0221 not significant not significant not significant 0.0522 0.057 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:GGB9581_SGB14999 0.0879 not significant not significant not significant not significant 0.0408 0.0363 not significant 0.0891 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Agathobaculum_butyriciproducens 0.0903 not significant 0.0712 not significant not significant not significant not significant not significant not significant not significant 0.0884 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Eubacterium_sp_AF34_35BH 0.091 not significant 0.0083 not significant not significant 0.0177 not significant not significant 0.0332 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Blautia_faecis 0.0928 0.0838 not significant not significant 0.0627 not significant not significant 0.0174 not significant not significant not significant not significant 0.0981 not significant not significant not significant not significant 0.0003 0.0505 not significant 0.0005 not significant
species:Bacteroidales_unclassified_SGB2135 0.0966 not significant not significant 0.0659 not significant not significant 0.0497 not significant not significant 0.0576 not significant 0.0775 not significant not significant not significant not significant not significant not significant not significant 0.0933 not significant not significant
species:Coprococcus_eutactus 0.0969 0.0578 not significant 0.0559 not significant not significant not significant not significant 0.0963 not significant not significant not significant not significant not significant not significant 0.0323 not significant not significant 0.0555 not significant not significant not significant
species:Veillonella_parvula 0.0986 not significant not significant not significant 0.0398 not significant not significant not significant not significant 0.005 not significant 0.0451 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Phocaeicola_dorei 0.0991 not significant not significant 0.0001 not significant 0.0172 not significant not significant not significant not significant 0.0405 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant
species:Anaerostipes_hadrus not significant 0.0001 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0 not significant 0.0023 0.0139 0.0003 not significant 0.0002 not significant 0.0085 0.0047
species:Blautia_obeum not significant 0.0003 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0234 0.0039 0.089 0.0009 0.0006 not significant 0.0012 not significant 0.0005 0.0871
species:Anaerobutyricum_hallii not significant 0.0007 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0105 0.0248 0.0495 0.0051 0.0095 0.0273 0 not significant 0.0007 not significant
species:Ruminococcaceae_unclassified_SGB4425 not significant 0.001 not significant 0.0686 0.0943 not significant not significant 0.0399 not significant not significant 0.086 not significant 0.0151 not significant 0.0094 0.0316 0.0074 not significant 0.0004 not significant not significant 0.003
species:Blautia_sp_AF19_10LB not significant 0.0014 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0023 not significant 0.0452 0.0131 0.0292 0.0151 0.0022 not significant 0.0053 0.083
species:GGB9631_SGB15085 not significant 0.0021 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0005 not significant 0.0062 not significant 0.0016 0.002 not significant 0.0571 0.0151
species:Lachnospira_sp_NSJ_43 not significant 0.0043 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0089 not significant 0.0002 not significant 0.0062 not significant 0.0426 0.0145 not significant 0.0104
species:Coprococcus_comes not significant 0.0066 0.0748 not significant 0.0936 not significant not significant not significant not significant not significant 0.0456 not significant 0.0036 not significant 0.0265 0.0949 0.0242 not significant 0.033 0.063 0.0606 0.0496
species:Ruminococcus_sp_AF41_9 not significant 0.0072 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0198 not significant 0.0022 not significant 0.0581 0.0518 0.0159 not significant 0.0566 0.0576
species:GGB3570_SGB4777 not significant 0.0088 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.023 not significant 0.0178 not significant 0.0204 not significant 0.021 not significant not significant 0.0017
species:Dorea_sp_AF24_7LB not significant 0.0099 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0457 not significant 0.0315 not significant 0.0385 not significant 0.022 not significant not significant 0.0227
species:GGB9603_SGB15035 not significant 0.0176 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0002 not significant 0.0136 not significant 0.0049 0.009 not significant not significant 0.0037
species:Dorea_longicatena not significant 0.02 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0907 not significant 0.095 not significant 0.0376 not significant 0.0218 not significant not significant 0.0132
species:Blautia_SGB4815 not significant 0.0214 not significant not significant not significant not significant not significant not significant not significant 0.0421 not significant not significant 0.0413 not significant 0.0054 not significant 0.0175 not significant not significant 0.0325 not significant 0.017
species:Gemmiger_formicilis not significant 0.0315 not significant not significant not significant 0.0451 0.044 not significant not significant not significant 0.0051 not significant not significant 0.0787 not significant 0.0797 0.0434 not significant 0.0546 not significant 0.0062 not significant
species:Faecalibacillus_faecis not significant 0.0332 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0629 not significant 0.0836 0.0191 not significant 0.0781 not significant 0.02 not significant
species:Blautia_sp_MSK_21_1 not significant 0.0374 not significant not significant not significant not significant not significant not significant not significant not significant 0.079 0.0758 not significant 0.0584 0.0653 not significant 0.0186 not significant 0.0384 not significant not significant 0.0004
species:Clostridia_unclassified_SGB3983 not significant 0.0375 not significant 0.0807 not significant 0.0768 not significant not significant not significant not significant not significant not significant not significant 0.002 0.0447 not significant not significant 0.0477 0.0415 not significant not significant 0.0036
species:Clostridia_bacterium not significant 0.0404 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0081 0.0887 not significant 0.0759 not significant 0.0001 not significant
species:Collinsella_aerofaciens not significant 0.0412 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0687 not significant 0.0398 not significant not significant not significant not significant 0.06
species:Streptococcus_sp_A12 not significant 0.0443 not significant not significant not significant 0.0375 0.0092 not significant not significant 0.054 0.0262 not significant 0.0407 not significant not significant 0.0342 0.0178 not significant not significant 0.0748 0.0357 not significant
species:Parolsenella_catena not significant 0.0448 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0378 0.0027 not significant 0.0458 not significant not significant 0.0786 not significant 0.0073
species:Ruminococcaceae_unclassified_SGB15236 not significant 0.0449 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0091 not significant not significant not significant 0.0252 0.0393 not significant not significant not significant
species:GGB3111_SGB4123 not significant 0.0461 not significant not significant not significant not significant 0.0392 not significant 0.0685 not significant not significant 0.0648 0.0147 not significant 0.0019 not significant 0.0615 not significant 0.063 not significant not significant 0.0056
species:Lachnospiraceae_bacterium_NSJ_46 not significant 0.0471 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0458 not significant not significant 0.077 not significant not significant not significant not significant 0.0569 not significant
species:Clostridium_fessum not significant 0.0511 not significant not significant not significant not significant not significant not significant 0.0496 0.0342 not significant not significant not significant not significant 0.0977 not significant 0.0885 not significant not significant not significant not significant not significant
species:GGB9621_SGB15073 not significant 0.0532 not significant 0.0981 not significant not significant not significant not significant not significant not significant 0.0333 not significant 0.0829 not significant not significant 0.0669 0.0285 not significant 0.0366 not significant 0.0105 not significant
species:Clostridia_unclassified_SGB4121 not significant 0.0555 not significant not significant not significant not significant not significant not significant not significant 0.0894 not significant not significant 0.0086 not significant not significant not significant not significant 0.048 0.0575 not significant not significant not significant
species:Faecalibacterium_SGB15346 not significant 0.0559 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0166 not significant not significant 0.0693 not significant not significant 0.0296 not significant 0.0781 not significant
species:Bacilli_unclassified_SGB6422 not significant 0.0576 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0457 not significant not significant 0.0217 not significant 0.0511 not significant not significant not significant
species:Prevotella_marseillensis not significant 0.0606 not significant not significant not significant not significant 0.0418 not significant not significant not significant not significant 0.0263 0.0824 not significant not significant not significant not significant not significant 0.0388 not significant not significant not significant
species:Blautia_faecicola not significant 0.0609 not significant 0.0228 not significant not significant not significant 0.0591 not significant 0.0856 not significant 0.0982 not significant 0.0308 not significant 0.0201 not significant not significant not significant not significant not significant 0.0149
species:GGB45432_SGB63101 not significant 0.0621 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0531 0.013 not significant 0.0303 not significant not significant 0.0847
species:Bacilli_unclassified_SGB6473 not significant 0.0657 not significant not significant not significant not significant 0.0373 not significant not significant not significant not significant not significant 0.0307 not significant not significant 0.0047 not significant 0.0923 0.0293 not significant 0.0018 not significant
species:Lachnospiraceae_bacterium_OF09_6 not significant 0.0706 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0627 not significant not significant not significant not significant 0.0543 not significant not significant not significant not significant
species:Lachnospira_eligens not significant 0.0715 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.044 not significant not significant 0.0672 not significant not significant not significant 0.0343
species:Clostridium_saccharogumia not significant 0.0722 0.0948 not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0889 not significant not significant not significant 0.0766 not significant 0.0633 not significant 0.0394 not significant
species:Eubacterium_ramulus not significant 0.076 0.0306 not significant 0.0494 not significant not significant not significant 0.0138 0.0952 0.049 not significant 0.0717 not significant 0.0433 not significant not significant not significant 0.0518 not significant 0.0885 not significant
species:Streptococcus_salivarius not significant 0.0828 not significant not significant not significant not significant 0.0249 not significant not significant 0.0918 not significant not significant 0.055 not significant 0.0821 not significant 0.0441 not significant not significant not significant not significant not significant
species:Lachnospira_SGB5076 not significant 0.0834 not significant not significant not significant not significant not significant not significant not significant 0.0065 not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.052 0.0404 not significant
species:Bifidobacterium_catenulatum not significant 0.0859 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0416 not significant not significant not significant 0.0085 not significant not significant 0.0129 not significant
species:GGB9778_SGB15398 not significant 0.0907 not significant 0.0649 not significant 0.0392 0.0628 not significant 0.0773 not significant not significant not significant not significant 0.0908 not significant 0.0196 0.0238 not significant 0.0397 not significant 0.0257 not significant
species:GGB9636_SGB15107 not significant 0.0987 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0153 not significant not significant not significant not significant 0.0368
Figures for publication

Inspiration for adding the phylogenetic tree: https://stackoverflow.com/questions/52281227/adding-a-ggtree-object-to-already-existing-ggplot-with-shared-y-axis

Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_pvalue_correct < 0.1) | (pastInt_pvalue_correct < 0.1))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 2),
                       baseline_medianRelAbd  = rep(dat_signif$baseline_medianRelAbd,  times = 2),
                       baseline_prevalence    = rep(dat_signif$baseline_prevalence,    times = 2),
                       postFresh_medianRelAbd = rep(dat_signif$postFresh_medianRelAbd, times = 2),
                       postFresh_prevalence   = rep(dat_signif$postFresh_prevalence,   times = 2),
                       postPast_medianRelAbd  = rep(dat_signif$postPast_medianRelAbd,  times = 2),
                       postPast_prevalence    = rep(dat_signif$postPast_prevalence,    times = 2),
                       model         = rep(dat_signif$model, times = 2),
                       intervention  = rep(c("Fresh intervention", "Pasteurized intervention"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_effect, dat_signif$pastInt_effect),
                       qvalue        = c(dat_signif$freshInt_pvalue_correct, dat_signif$pastInt_pvalue_correct),
                       pastEffect_forSorting = rep(dat_signif$pastInt_effect, times = 2)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((qvalue < 0.1) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (qvalue < 0.1) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (qvalue < 0.1) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (qvalue < 0.1) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the median rel. abd.'s and the prevalences as separate 'interventions' and 'effect_labels' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.\n(prevalence)",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label_relAbd = case_when(dat_spec$baseline_medianRelAbd[1] < .0001 ~ "<0.0001%",
                                                                dat_spec$baseline_medianRelAbd[1] < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd[1], 4), "%"),
                                                                dat_spec$baseline_medianRelAbd[1] < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd[1], 3), "%"),
                                                                dat_spec$baseline_medianRelAbd[1] < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd[1], 2), "%"),
                                                                dat_spec$baseline_medianRelAbd[1] < 10    ~ paste0(round(dat_spec$baseline_medianRelAbd[1], 1), "%"),
                                                                TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd[1], 0), "%")),
                                effect_label_prevalence = case_when(dat_spec$baseline_prevalence[1] < 0.01 ~ paste0(round(100 * dat_spec$baseline_prevalence[1], 1), "%"),
                                                                    TRUE                                   ~ paste0(round(100 * dat_spec$baseline_prevalence[1], 0), "%"))) %>% 
                       mutate(effect_label = paste0(effect_label_relAbd, "\n(", effect_label_prevalence, ")"))) %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "After fresh median rel. abd.\n(prevalence)",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label_relAbd = case_when(dat_spec$postFresh_medianRelAbd[1] < .0001 ~ "<0.0001%",
                                                                dat_spec$postFresh_medianRelAbd[1] < .01   ~ paste0(round(dat_spec$postFresh_medianRelAbd[1], 4), "%"),
                                                                dat_spec$postFresh_medianRelAbd[1] < .1    ~ paste0(round(dat_spec$postFresh_medianRelAbd[1], 3), "%"),
                                                                dat_spec$postFresh_medianRelAbd[1] < 1     ~ paste0(round(dat_spec$postFresh_medianRelAbd[1], 2), "%"),
                                                                dat_spec$postFresh_medianRelAbd[1] < 10    ~ paste0(round(dat_spec$postFresh_medianRelAbd[1], 1), "%"),
                                                                TRUE ~ paste0(round(dat_spec$postFresh_medianRelAbd[1], 0), "%")),
                                effect_label_prevalence = case_when(dat_spec$postFresh_prevalence[1] < 0.01 ~ paste0(round(100 * dat_spec$postFresh_prevalence[1], 1), "%"),
                                                                    TRUE                                    ~ paste0(round(100 * dat_spec$postFresh_prevalence[1], 0), "%"))) %>%
                       mutate(effect_label = paste0(effect_label_relAbd, "\n(", effect_label_prevalence, ")"))) %>%
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "After past. median rel. abd.\n(prevalence)",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label_relAbd = case_when(dat_spec$postPast_medianRelAbd[1] < .0001 ~ "<0.0001%",
                                                                dat_spec$postPast_medianRelAbd[1] < .01   ~ paste0(round(dat_spec$postPast_medianRelAbd[1], 4), "%"),
                                                                dat_spec$postPast_medianRelAbd[1] < .1    ~ paste0(round(dat_spec$postPast_medianRelAbd[1], 3), "%"),
                                                                dat_spec$postPast_medianRelAbd[1] < 1     ~ paste0(round(dat_spec$postPast_medianRelAbd[1], 2), "%"),
                                                                dat_spec$postPast_medianRelAbd[1] < 10    ~ paste0(round(dat_spec$postPast_medianRelAbd[1], 1), "%"),
                                                                TRUE ~ paste0(round(dat_spec$postPast_medianRelAbd[1], 0), "%")),
                                effect_label_prevalence = case_when(dat_spec$postPast_prevalence[1] < 0.01 ~ paste0(round(100 * dat_spec$postPast_prevalence[1], 1), "%"),
                                                                    TRUE                                   ~ paste0(round(100 * dat_spec$postPast_prevalence[1], 0), "%"))) %>%
                       mutate(effect_label = paste0(effect_label_relAbd, "\n(", effect_label_prevalence, ")")))
})
dat_plot_overall <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.\n(prevalence)",
                                                        "After fresh median rel. abd.\n(prevalence)",
                                                        "After past. median rel. abd.\n(prevalence)",
                                                        "Fresh intervention", "Pasteurized intervention")),
         text_size    = case_when(grepl("intervention", intervention) ~ 4,
                                  TRUE                                ~ 3),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant",
                                                          "significant pos. effect (fresh intervention)",
                                                          "significant neg. effect (fresh intervention)",
                                                          "significant pos. effect (pasteurized intervention)",
                                                          "significant neg. effect (pasteurized intervention)",
                                                          "")))

# plot
gg_qvalues <- dat_plot_overall %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label, size = text_size)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "significant pos. effect (fresh intervention)" = "#7cbde9",
                                          "significant neg. effect (fresh intervention)" = "#1F78B4",
                                          "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                          "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white", drop = FALSE) +
  scale_size(range = c(3, 4), guide = "none") +
  facet_grid(rows = vars(phylum), scales = "free", space = "free") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))
gg_qvalues

Code
# ggsave("functional_speciesScreening_qvalues.png", width = 14, height = 3)
Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_pvalue < 0.05) | (pastInt_pvalue < 0.05))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 2),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 2),
                       model         = rep(dat_signif$model, times = 2),
                       intervention  = rep(c("Fresh intervention", "Pasteurized intervention"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_effect, dat_signif$pastInt_effect),
                       pvalue        = c(dat_signif$freshInt_pvalue, dat_signif$pastInt_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_effect, times = 2)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species        = factor(species, levels = rev(unique(species))),
         intervention   = factor(intervention, levels = c("Baseline median rel. abd.", "Fresh intervention", "Pasteurized intervention")),
         pvalue_signif  = factor(pvalue_signif, levels = c("not significant",
                                                           "significant pos. effect (fresh intervention)",
                                                           "significant neg. effect (fresh intervention)",
                                                           "significant pos. effect (pasteurized intervention)",
                                                           "significant neg. effect (pasteurized intervention)",
                                                           "")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"))


# create tree plot
tse_filtered <- tse[rowData(tse)$species %in% gsub(dat_plot$species, pattern = " ", replacement = "_"),]
gg_tree <- mia::taxonomyTree(tse_filtered) %>% ggtree()

# # color-code the tree
# dat_treeColors <- data.frame(node = c(49, 60, 66),
#                              type = c("A", "B", "C"))
# gg_tree <- gg_tree +
#   geom_hilight(data  = dat_treeColors, aes(node = node, fill = type),
#                type  = "roundrect",
#                alpha = 0.2) +
#   theme(legend.position = "none")

# make sure the main heatmap has the same ordering as the tree
gg_tree_build    <- ggplot_build(gg_tree + geom_tiplab()) # add tiplab to retrieve species labels
species_ordering <- gg_tree_build$data[[3]] %>% arrange(desc(y)) %>% pull(label)
species_ordering <- species_ordering %>%
  gsub(pattern = "species:", replacement = "") %>%
  gsub(pattern = "_",        replacement = " ")
dat_plot         <- dat_plot %>%
  mutate(species = factor(species, levels = rev(species_ordering)))

# plot
gg_heatmap <- dat_plot %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label, col = text_highlight)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  scale_color_manual(values = c("gray80", "black"), guide = "none") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank())

# align the two plots
gg_pvalues <- (gg_tree + gg_heatmap) + patchwork::plot_layout(widths = c(0.2, 0.8))
gg_pvalues

Code
# ggsave("FigureS11_speciesScreening_pvalues.png", width = 12, height = 12)

Results for interaction effects

Overview of the numbers of significant genes
Code
data.frame("what"  = c("No. of species with relevant abundance",
                       "No. of significant species (fresh intervention - age low)",
                       "No. of significant species (fresh intervention - age high)",
                       "No. of significant species (past. intervention - age low)",
                       "No. of significant species (past. intervention - age high)",
                       "No. of significant species (fresh intervention - gender male)",
                       "No. of significant species (fresh intervention - gender female)",
                       "No. of significant species (past. intervention - gender male)",
                       "No. of significant species (past. intervention - gender female)",
                       "No. of significant species (fresh intervention - BMI low)",
                       "No. of significant species (fresh intervention - BMI high)",
                       "No. of significant species (past. intervention - BMI low)",
                       "No. of significant species (past. intervention - BMI high)",
                       "No. of significant species (fresh intervention - fiber low)",
                       "No. of significant species (fresh intervention - fiber high)",
                       "No. of significant species (past. intervention - fiber low)",
                       "No. of significant species (past. intervention - fiber high)",
                       "No. of significant species (fresh intervention - baseline diversity low)",
                       "No. of significant species (fresh intervention - baseline diversity high)",
                       "No. of significant species (past. intervention - baseline diversity low)",
                       "No. of significant species (past. intervention - baseline diversity high)"),
           "value" = c(nrow(dat_results),
                       sum(dat_results$freshInt_ageLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$freshInt_ageHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_ageLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_ageHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$freshInt_sexM_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$freshInt_sexW_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_sexM_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_sexW_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$freshInt_bmiLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$freshInt_bmiHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_bmiLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_bmiHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$freshInt_fibLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$freshInt_fibHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_fibLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_fibHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$freshInt_divLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$freshInt_divHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_divLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_results$pastInt_divHigh_pvalue_correct < 0.1, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of species with relevant abundance 362
No. of significant species (fresh intervention - age low) 1
No. of significant species (fresh intervention - age high) 3
No. of significant species (past. intervention - age low) 1
No. of significant species (past. intervention - age high) 2
No. of significant species (fresh intervention - gender male) 1
No. of significant species (fresh intervention - gender female) 1
No. of significant species (past. intervention - gender male) 0
No. of significant species (past. intervention - gender female) 0
No. of significant species (fresh intervention - BMI low) 1
No. of significant species (fresh intervention - BMI high) 2
No. of significant species (past. intervention - BMI low) 1
No. of significant species (past. intervention - BMI high) 1
No. of significant species (fresh intervention - fiber low) 1
No. of significant species (fresh intervention - fiber high) 1
No. of significant species (past. intervention - fiber low) 2
No. of significant species (past. intervention - fiber high) 0
No. of significant species (fresh intervention - baseline diversity low) 1
No. of significant species (fresh intervention - baseline diversity high) 1
No. of significant species (past. intervention - baseline diversity low) 1
No. of significant species (past. intervention - baseline diversity high) 0
… alternatively, the numbers of significant genes based on uncorrected p-values
Code
data.frame("what"  = c("No. of species with relevant abundance",
                       "No. of significant species (fresh intervention - age low)",
                       "No. of significant species (fresh intervention - age high)",
                       "No. of significant species (past. intervention - age low)",
                       "No. of significant species (past. intervention - age high)",
                       "No. of significant species (fresh intervention - gender male)",
                       "No. of significant species (fresh intervention - gender female)",
                       "No. of significant species (past. intervention - gender male)",
                       "No. of significant species (past. intervention - gender female)",
                       "No. of significant species (fresh intervention - BMI low)",
                       "No. of significant species (fresh intervention - BMI high)",
                       "No. of significant species (past. intervention - BMI low)",
                       "No. of significant species (past. intervention - BMI high)",
                       "No. of significant species (fresh intervention - fiber low)",
                       "No. of significant species (fresh intervention - fiber high)",
                       "No. of significant species (past. intervention - fiber low)",
                       "No. of significant species (past. intervention - fiber high)",
                       "No. of significant species (fresh intervention - baseline diversity low)",
                       "No. of significant species (fresh intervention - baseline diversity high)",
                       "No. of significant species (past. intervention - baseline diversity low)",
                       "No. of significant species (past. intervention - baseline diversity high)"),
           "value" = c(nrow(dat_results),
                       sum(dat_results$freshInt_ageLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$freshInt_ageHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$pastInt_ageLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$pastInt_ageHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$freshInt_sexM_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$freshInt_sexW_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$pastInt_sexM_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$pastInt_sexW_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$freshInt_bmiLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$freshInt_bmiHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$pastInt_bmiLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$pastInt_bmiHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$freshInt_fibLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$freshInt_fibHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$pastInt_fibLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$pastInt_fibHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$freshInt_divLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$freshInt_divHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$pastInt_divLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_results$pastInt_divHigh_pvalue < 0.05, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of species with relevant abundance 362
No. of significant species (fresh intervention - age low) 14
No. of significant species (fresh intervention - age high) 27
No. of significant species (past. intervention - age low) 26
No. of significant species (past. intervention - age high) 21
No. of significant species (fresh intervention - gender male) 19
No. of significant species (fresh intervention - gender female) 24
No. of significant species (past. intervention - gender male) 21
No. of significant species (past. intervention - gender female) 24
No. of significant species (fresh intervention - BMI low) 25
No. of significant species (fresh intervention - BMI high) 23
No. of significant species (past. intervention - BMI low) 32
No. of significant species (past. intervention - BMI high) 21
No. of significant species (fresh intervention - fiber low) 22
No. of significant species (fresh intervention - fiber high) 21
No. of significant species (past. intervention - fiber low) 29
No. of significant species (past. intervention - fiber high) 18
No. of significant species (fresh intervention - baseline diversity low) 23
No. of significant species (fresh intervention - baseline diversity high) 21
No. of significant species (past. intervention - baseline diversity low) 20
No. of significant species (past. intervention - baseline diversity high) 26

baseline diversity

Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_divLow_pvalue_correct < 0.1) | (freshInt_divHigh_pvalue_correct < 0.1) |
           (pastInt_divLow_pvalue_correct < 0.1) | (pastInt_divHigh_pvalue_correct < 0.1))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(baseline div. low)", "Fresh intervention\n(baseline div. high)",
                                             "Pasteurized intervention\n(baseline div. low)", "Pasteurized intervention\n(baseline div. high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_divLow_effect, dat_signif$freshInt_divHigh_effect,
                                         dat_signif$pastInt_divLow_effect,  dat_signif$pastInt_divHigh_effect),
                       qvalue        = c(dat_signif$freshInt_divLow_pvalue_correct, dat_signif$freshInt_divHigh_pvalue_correct,
                                         dat_signif$pastInt_divLow_pvalue_correct,  dat_signif$pastInt_divHigh_pvalue_correct),
                       pastEffect_forSorting = rep(dat_signif$pastInt_divLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((qvalue < 0.1) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (qvalue < 0.1) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (qvalue < 0.1) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (qvalue < 0.1) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot_divInt <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.", "Fresh intervention\n(baseline div. low)", "Fresh intervention\n(baseline div. high)",
                                                        "Pasteurized intervention\n(baseline div. low)", "Pasteurized intervention\n(baseline div. high)")),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant",
                                                          "significant pos. effect (fresh intervention)",
                                                          "significant neg. effect (fresh intervention)",
                                                          "significant pos. effect (pasteurized intervention)",
                                                          "significant neg. effect (pasteurized intervention)",
                                                          "")))

# plot
gg_divInt_qvalue <- dat_plot_divInt %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "significant pos. effect (fresh intervention)" = "#7cbde9",
                                          "significant neg. effect (fresh intervention)" = "#1F78B4",
                                          "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                          "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white", drop = FALSE) +
  facet_grid(rows = vars(phylum), scales = "free", space = "free") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))
gg_divInt_qvalue

Code
# ggsave("functional_speciesScreening_divInteractions_qvalues.png", width = 12, height = 3)
Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_divLow_pvalue < 0.05) | (freshInt_divHigh_pvalue < 0.05) |
           (pastInt_divLow_pvalue < 0.05) | (pastInt_divHigh_pvalue < 0.05))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(baseline div. low)", "Fresh intervention\n(baseline div. high)",
                                             "Pasteurized intervention\n(baseline div. low)", "Pasteurized intervention\n(baseline div. high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_divLow_effect, dat_signif$freshInt_divHigh_effect,
                                         dat_signif$pastInt_divLow_effect,  dat_signif$pastInt_divHigh_effect),
                       pvalue        = c(dat_signif$freshInt_divLow_pvalue, dat_signif$freshInt_divHigh_pvalue,
                                         dat_signif$pastInt_divLow_pvalue,  dat_signif$pastInt_divHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_divLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = gsub(intervention, pattern = "low",  replacement = "[2.8, 3.85)"),
         intervention = gsub(intervention, pattern = "high", replacement = "[3.85, 4.4)"),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.",
                                                        "Fresh intervention\n(baseline div. [2.8, 3.85))",
                                                        "Fresh intervention\n(baseline div. [3.85, 4.4))",
                                                        "Pasteurized intervention\n(baseline div. [2.8, 3.85))",
                                                        "Pasteurized intervention\n(baseline div. [3.85, 4.4))")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"),
         pvalue_signif  = factor(pvalue_signif, levels = c("not significant",
                                                           "significant pos. effect (fresh intervention)",
                                                           "significant neg. effect (fresh intervention)",
                                                           "significant pos. effect (pasteurized intervention)",
                                                           "significant neg. effect (pasteurized intervention)",
                                                           "")))


# create tree plot
tse_filtered <- tse[rowData(tse)$species %in% gsub(dat_plot$species, pattern = " ", replacement = "_"),]
gg_tree <- mia::taxonomyTree(tse_filtered) %>% ggtree()

# make sure the main heatmap has the same ordering as the tree
gg_tree_build    <- ggplot_build(gg_tree + geom_tiplab()) # add tiplab to retrieve species labels
species_ordering <- gg_tree_build$data[[3]] %>% arrange(desc(y)) %>% pull(label)
species_ordering <- species_ordering %>% 
  gsub(pattern = "species:", replacement = "") %>% 
  gsub(pattern = "_",        replacement = " ")
dat_plot         <- dat_plot %>%
  mutate(species = factor(species, levels = rev(species_ordering)))

# plot
gg_heatmap <- dat_plot %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label, col = text_highlight)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  scale_color_manual(values = c("gray80", "black"), guide = "none") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank())

# align the two plots
(gg_tree + gg_heatmap) + patchwork::plot_layout(widths = c(0.2, 0.8))

Age

Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_ageLow_pvalue_correct < 0.1) | (freshInt_ageHigh_pvalue_correct < 0.1) |
           (pastInt_ageLow_pvalue_correct < 0.1) | (pastInt_ageHigh_pvalue_correct < 0.1))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(age low)", "Fresh intervention\n(age high)",
                                             "Pasteurized intervention\n(age low)", "Pasteurized intervention\n(age high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_ageLow_effect, dat_signif$freshInt_ageHigh_effect,
                                         dat_signif$pastInt_ageLow_effect,  dat_signif$pastInt_ageHigh_effect),
                       qvalue        = c(dat_signif$freshInt_ageLow_pvalue_correct, dat_signif$freshInt_ageHigh_pvalue_correct,
                                         dat_signif$pastInt_ageLow_pvalue_correct,  dat_signif$pastInt_ageHigh_pvalue_correct),
                       pastEffect_forSorting = rep(dat_signif$pastInt_ageLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((qvalue < 0.1) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (qvalue < 0.1) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (qvalue < 0.1) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (qvalue < 0.1) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot_ageInt <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.", "Fresh intervention\n(age low)", "Fresh intervention\n(age high)",
                                                        "Pasteurized intervention\n(age low)", "Pasteurized intervention\n(age high)")),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant",
                                                          "significant pos. effect (fresh intervention)",
                                                          "significant neg. effect (fresh intervention)",
                                                          "significant pos. effect (pasteurized intervention)",
                                                          "significant neg. effect (pasteurized intervention)",
                                                          "")))

# plot
gg_ageInt_qvalue <- dat_plot_ageInt %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "significant pos. effect (fresh intervention)" = "#7cbde9",
                                          "significant neg. effect (fresh intervention)" = "#1F78B4",
                                          "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                          "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white", drop = FALSE) +
  facet_grid(rows = vars(phylum), scales = "free", space = "free") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))
gg_ageInt_qvalue

Code
# ggsave("functional_speciesScreening_ageInteractions_qvalues.png", width = 12, height = 4)
Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_ageLow_pvalue < 0.05) | (freshInt_ageHigh_pvalue < 0.05) |
           (pastInt_ageLow_pvalue < 0.05) | (pastInt_ageHigh_pvalue < 0.05))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(age low)", "Fresh intervention\n(age high)",
                                             "Pasteurized intervention\n(age low)", "Pasteurized intervention\n(age high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_ageLow_effect, dat_signif$freshInt_ageHigh_effect,
                                         dat_signif$pastInt_ageLow_effect,  dat_signif$pastInt_ageHigh_effect),
                       pvalue        = c(dat_signif$freshInt_ageLow_pvalue, dat_signif$freshInt_ageHigh_pvalue,
                                         dat_signif$pastInt_ageLow_pvalue,  dat_signif$pastInt_ageHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_ageLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = gsub(intervention, pattern = "age low",  replacement = "age 21-49"),
         intervention = gsub(intervention, pattern = "age high", replacement = "age 50-69"),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.",
                                                        "Fresh intervention\n(age 21-49)",
                                                        "Fresh intervention\n(age 50-69)",
                                                        "Pasteurized intervention\n(age 21-49)",
                                                        "Pasteurized intervention\n(age 50-69)")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"),
         pvalue_signif  = factor(pvalue_signif, levels = c("not significant",
                                                           "significant pos. effect (fresh intervention)",
                                                           "significant neg. effect (fresh intervention)",
                                                           "significant pos. effect (pasteurized intervention)",
                                                           "significant neg. effect (pasteurized intervention)",
                                                           "")))


# create tree plot
tse_filtered <- tse[rowData(tse)$species %in% gsub(dat_plot$species, pattern = " ", replacement = "_"),]
gg_tree <- mia::taxonomyTree(tse_filtered) %>% ggtree()

# make sure the main heatmap has the same ordering as the tree
gg_tree_build    <- ggplot_build(gg_tree + geom_tiplab()) # add tiplab to retrieve species labels
species_ordering <- gg_tree_build$data[[3]] %>% arrange(desc(y)) %>% pull(label)
species_ordering <- species_ordering %>% 
  gsub(pattern = "species:", replacement = "") %>% 
  gsub(pattern = "_",        replacement = " ")
dat_plot         <- dat_plot %>%
  mutate(species = factor(species, levels = rev(species_ordering)))

# plot
gg_heatmap <- dat_plot %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label, col = text_highlight)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  scale_color_manual(values = c("gray80", "black"), guide = "none") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank())

# align the two plots
(gg_tree + gg_heatmap) + patchwork::plot_layout(widths = c(0.2, 0.8))

Gender

Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_sexM_pvalue_correct < 0.1) | (freshInt_sexW_pvalue_correct < 0.1) |
           (pastInt_sexM_pvalue_correct < 0.1) | (pastInt_sexW_pvalue_correct < 0.1))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(gender male)", "Fresh intervention\n(gender female)",
                                             "Pasteurized intervention\n(gender male)", "Pasteurized intervention\n(gender female)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_sexM_effect, dat_signif$freshInt_sexW_effect,
                                         dat_signif$pastInt_sexM_effect,  dat_signif$pastInt_sexW_effect),
                       qvalue        = c(dat_signif$freshInt_sexM_pvalue_correct, dat_signif$freshInt_sexW_pvalue_correct,
                                         dat_signif$pastInt_sexM_pvalue_correct,  dat_signif$pastInt_sexW_pvalue_correct),
                       pastEffect_forSorting = rep(dat_signif$pastInt_sexM_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((qvalue < 0.1) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (qvalue < 0.1) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (qvalue < 0.1) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (qvalue < 0.1) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot_sexInt <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.", "Fresh intervention\n(gender male)", "Fresh intervention\n(gender female)",
                                                        "Pasteurized intervention\n(gender male)", "Pasteurized intervention\n(gender female)")),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant",
                                                          "significant pos. effect (fresh intervention)",
                                                          "significant neg. effect (fresh intervention)",
                                                          "significant pos. effect (pasteurized intervention)",
                                                          "significant neg. effect (pasteurized intervention)",
                                                          "")))

# plot
gg_sexInt_qvalue <- dat_plot_sexInt %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white", drop = FALSE) +
  facet_grid(rows = vars(phylum), scales = "free", space = "free") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))
gg_sexInt_qvalue

Code
# ggsave("functional_speciesScreening_sexInteractions_qvalues.png", width = 12, height = 1.5)
Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_sexM_pvalue < 0.05) | (freshInt_sexW_pvalue < 0.05) |
           (pastInt_sexM_pvalue < 0.05) | (pastInt_sexW_pvalue < 0.05))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(gender male)", "Fresh intervention\n(gender female)",
                                             "Pasteurized intervention\n(gender male)", "Pasteurized intervention\n(gender female)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_sexM_effect, dat_signif$freshInt_sexW_effect,
                                         dat_signif$pastInt_sexM_effect,  dat_signif$pastInt_sexW_effect),
                       pvalue        = c(dat_signif$freshInt_sexM_pvalue, dat_signif$freshInt_sexW_pvalue,
                                         dat_signif$pastInt_sexM_pvalue,  dat_signif$pastInt_sexW_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_sexM_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = gsub(intervention, pattern = "gender ", replacement = ""),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.",
                                                        "Fresh intervention\n(male)",
                                                        "Fresh intervention\n(female)",
                                                        "Pasteurized intervention\n(male)",
                                                        "Pasteurized intervention\n(female)")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"),
         pvalue_signif  = factor(pvalue_signif, levels = c("not significant",
                                                           "significant pos. effect (fresh intervention)",
                                                           "significant neg. effect (fresh intervention)",
                                                           "significant pos. effect (pasteurized intervention)",
                                                           "significant neg. effect (pasteurized intervention)",
                                                           "")))


# create tree plot
tse_filtered <- tse[rowData(tse)$species %in% gsub(dat_plot$species, pattern = " ", replacement = "_"),]
gg_tree <- mia::taxonomyTree(tse_filtered) %>% ggtree()

# make sure the main heatmap has the same ordering as the tree
gg_tree_build    <- ggplot_build(gg_tree + geom_tiplab()) # add tiplab to retrieve species labels
species_ordering <- gg_tree_build$data[[3]] %>% arrange(desc(y)) %>% pull(label)
species_ordering <- species_ordering %>% 
  gsub(pattern = "species:", replacement = "") %>% 
  gsub(pattern = "_",        replacement = " ")
dat_plot         <- dat_plot %>%
  mutate(species = factor(species, levels = rev(species_ordering)))

# plot
gg_heatmap <- dat_plot %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label, col = text_highlight)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  scale_color_manual(values = c("gray80", "black"), guide = "none") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank())

# align the two plots
(gg_tree + gg_heatmap) + patchwork::plot_layout(widths = c(0.2, 0.8))

BMI

Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_bmiLow_pvalue_correct < 0.1) | (freshInt_bmiHigh_pvalue_correct < 0.1) |
           (pastInt_bmiLow_pvalue_correct < 0.1) | (pastInt_bmiHigh_pvalue_correct < 0.1))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(BMI low)", "Fresh intervention\n(BMI high)",
                                             "Pasteurized intervention\n(BMI low)", "Pasteurized intervention\n(BMI high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_bmiLow_effect, dat_signif$freshInt_bmiHigh_effect,
                                         dat_signif$pastInt_bmiLow_effect,  dat_signif$pastInt_bmiHigh_effect),
                       qvalue        = c(dat_signif$freshInt_bmiLow_pvalue_correct, dat_signif$freshInt_bmiHigh_pvalue_correct,
                                         dat_signif$pastInt_bmiLow_pvalue_correct,  dat_signif$pastInt_bmiHigh_pvalue_correct),
                       pastEffect_forSorting = rep(dat_signif$pastInt_bmiLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((qvalue < 0.1) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (qvalue < 0.1) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (qvalue < 0.1) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (qvalue < 0.1) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot_bmiInt <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.", "Fresh intervention\n(BMI low)", "Fresh intervention\n(BMI high)",
                                                        "Pasteurized intervention\n(BMI low)", "Pasteurized intervention\n(BMI high)")),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant",
                                                          "significant pos. effect (fresh intervention)",
                                                          "significant neg. effect (fresh intervention)",
                                                          "significant pos. effect (pasteurized intervention)",
                                                          "significant neg. effect (pasteurized intervention)",
                                                          "")))

# plot
gg_bmiInt_qvalue <- dat_plot_bmiInt %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "significant pos. effect (fresh intervention)" = "#7cbde9",
                                          "significant neg. effect (fresh intervention)" = "#1F78B4",
                                          "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                          "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white", drop = FALSE) +
  facet_grid(rows = vars(phylum), scales = "free", space = "free") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))
gg_bmiInt_qvalue

Code
# ggsave("functional_speciesScreening_bmiInteractions_qvalues.png", width = 12, height = 3)
Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_bmiLow_pvalue < 0.05) | (freshInt_bmiHigh_pvalue < 0.05) |
           (pastInt_bmiLow_pvalue < 0.05) | (pastInt_bmiHigh_pvalue < 0.05))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(BMI low)", "Fresh intervention\n(BMI high)",
                                             "Pasteurized intervention\n(BMI low)", "Pasteurized intervention\n(BMI high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_bmiLow_effect, dat_signif$freshInt_bmiHigh_effect,
                                         dat_signif$pastInt_bmiLow_effect,  dat_signif$pastInt_bmiHigh_effect),
                       pvalue        = c(dat_signif$freshInt_bmiLow_pvalue, dat_signif$freshInt_bmiHigh_pvalue,
                                         dat_signif$pastInt_bmiLow_pvalue,  dat_signif$pastInt_bmiHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_bmiLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = gsub(intervention, pattern = "low",  replacement = "[18, 25)"),
         intervention = gsub(intervention, pattern = "high", replacement = "[25, 31]"),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.",
                                                        "Fresh intervention\n(BMI [18, 25))",
                                                        "Fresh intervention\n(BMI [25, 31])",
                                                        "Pasteurized intervention\n(BMI [18, 25))",
                                                        "Pasteurized intervention\n(BMI [25, 31])")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"),
         pvalue_signif  = factor(pvalue_signif, levels = c("not significant",
                                                           "significant pos. effect (fresh intervention)",
                                                           "significant neg. effect (fresh intervention)",
                                                           "significant pos. effect (pasteurized intervention)",
                                                           "significant neg. effect (pasteurized intervention)",
                                                           "")))


# create tree plot
tse_filtered <- tse[rowData(tse)$species %in% gsub(dat_plot$species, pattern = " ", replacement = "_"),]
gg_tree <- mia::taxonomyTree(tse_filtered) %>% ggtree()

# make sure the main heatmap has the same ordering as the tree
gg_tree_build    <- ggplot_build(gg_tree + geom_tiplab()) # add tiplab to retrieve species labels
species_ordering <- gg_tree_build$data[[3]] %>% arrange(desc(y)) %>% pull(label)
species_ordering <- species_ordering %>% 
  gsub(pattern = "species:", replacement = "") %>% 
  gsub(pattern = "_",        replacement = " ")
dat_plot         <- dat_plot %>%
  mutate(species = factor(species, levels = rev(species_ordering)))

# plot
gg_heatmap <- dat_plot %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label, col = text_highlight)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  scale_color_manual(values = c("gray80", "black"), guide = "none") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank())

# align the two plots
(gg_tree + gg_heatmap) + patchwork::plot_layout(widths = c(0.2, 0.8))

fiber

Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_fibLow_pvalue_correct < 0.1) | (freshInt_fibHigh_pvalue_correct < 0.1) |
           (pastInt_fibLow_pvalue_correct < 0.1) | (pastInt_fibHigh_pvalue_correct < 0.1))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(fiber low)", "Fresh intervention\n(fiber high)",
                                             "Pasteurized intervention\n(fiber low)", "Pasteurized intervention\n(fiber high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_fibLow_effect, dat_signif$freshInt_fibHigh_effect,
                                         dat_signif$pastInt_fibLow_effect,  dat_signif$pastInt_fibHigh_effect),
                       qvalue        = c(dat_signif$freshInt_fibLow_pvalue_correct, dat_signif$freshInt_fibHigh_pvalue_correct,
                                         dat_signif$pastInt_fibLow_pvalue_correct,  dat_signif$pastInt_fibHigh_pvalue_correct),
                       pastEffect_forSorting = rep(dat_signif$pastInt_fibLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((qvalue < 0.1) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (qvalue < 0.1) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (qvalue < 0.1) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (qvalue < 0.1) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot_fibInt <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.", "Fresh intervention\n(fiber low)", "Fresh intervention\n(fiber high)",
                                                        "Pasteurized intervention\n(fiber low)", "Pasteurized intervention\n(fiber high)")),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant",
                                                          "significant pos. effect (fresh intervention)",
                                                          "significant neg. effect (fresh intervention)",
                                                          "significant pos. effect (pasteurized intervention)",
                                                          "significant neg. effect (pasteurized intervention)",
                                                          "")))

# plot
gg_fibInt_qvalue <- dat_plot_fibInt %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "significant pos. effect (fresh intervention)" = "#7cbde9",
                                          "significant neg. effect (fresh intervention)" = "#1F78B4",
                                          "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                          "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white", drop = FALSE) +
  facet_grid(rows = vars(phylum), scales = "free", space = "free") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))
gg_fibInt_qvalue

Code
# ggsave("functional_speciesScreening_fibInteractions_qvalues.png", width = 12, height = 3)
Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_fibLow_pvalue < 0.05) | (freshInt_fibHigh_pvalue < 0.05) |
           (pastInt_fibLow_pvalue < 0.05) | (pastInt_fibHigh_pvalue < 0.05))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(fiber low)", "Fresh intervention\n(fiber high)",
                                             "Pasteurized intervention\n(fiber low)", "Pasteurized intervention\n(fiber high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_fibLow_effect, dat_signif$freshInt_fibHigh_effect,
                                         dat_signif$pastInt_fibLow_effect,  dat_signif$pastInt_fibHigh_effect),
                       pvalue        = c(dat_signif$freshInt_fibLow_pvalue, dat_signif$freshInt_fibHigh_pvalue,
                                         dat_signif$pastInt_fibLow_pvalue,  dat_signif$pastInt_fibHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_fibLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         # intervention = gsub(intervention, pattern = "low",  replacement = "[18, 25)"),
         # intervention = gsub(intervention, pattern = "high", replacement = "[25, 31]"),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.",
                                                        "Fresh intervention\n(fiber low)",
                                                        "Fresh intervention\n(fiber high)",
                                                        "Pasteurized intervention\n(fiber low)",
                                                        "Pasteurized intervention\n(fiber high)")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"),
         pvalue_signif  = factor(pvalue_signif, levels = c("not significant",
                                                           "significant pos. effect (fresh intervention)",
                                                           "significant neg. effect (fresh intervention)",
                                                           "significant pos. effect (pasteurized intervention)",
                                                           "significant neg. effect (pasteurized intervention)",
                                                           "")))


# create tree plot
tse_filtered <- tse[rowData(tse)$species %in% gsub(dat_plot$species, pattern = " ", replacement = "_"),]
gg_tree <- mia::taxonomyTree(tse_filtered) %>% ggtree()

# make sure the main heatmap has the same ordering as the tree
gg_tree_build    <- ggplot_build(gg_tree + geom_tiplab()) # add tiplab to retrieve species labels
species_ordering <- gg_tree_build$data[[3]] %>% arrange(desc(y)) %>% pull(label)
species_ordering <- species_ordering %>% 
  gsub(pattern = "species:", replacement = "") %>% 
  gsub(pattern = "_",        replacement = " ")
dat_plot         <- dat_plot %>%
  mutate(species = factor(species, levels = rev(species_ordering)))

# plot
gg_heatmap <- dat_plot %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label, col = text_highlight)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  scale_color_manual(values = c("gray80", "black"), guide = "none") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank())

# align the two plots
(gg_tree + gg_heatmap) + patchwork::plot_layout(widths = c(0.2, 0.8))

Joint heatmap of all interaction effects

… of uncorrected p-values

Create the diversity interaction data

… same as for the above plot, but also including species that showed significances for any other interaction

Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_divLow_pvalue < 0.05) | (freshInt_divHigh_pvalue < 0.05) |
           (pastInt_divLow_pvalue < 0.05) | (pastInt_divHigh_pvalue < 0.05) |
           (freshInt_ageLow_pvalue < 0.05) | (freshInt_ageHigh_pvalue < 0.05) |
           (pastInt_ageLow_pvalue < 0.05) | (pastInt_ageHigh_pvalue < 0.05) |
           (freshInt_sexM_pvalue < 0.05) | (freshInt_sexW_pvalue < 0.05) |
           (pastInt_sexM_pvalue < 0.05) | (pastInt_sexW_pvalue < 0.05) |
           (freshInt_bmiLow_pvalue < 0.05) | (freshInt_bmiHigh_pvalue < 0.05) |
           (pastInt_bmiLow_pvalue < 0.05) | (pastInt_bmiHigh_pvalue < 0.05) |
           (freshInt_fibLow_pvalue < 0.05) | (freshInt_fibHigh_pvalue < 0.05) |
           (pastInt_fibLow_pvalue < 0.05) | (pastInt_fibHigh_pvalue < 0.05))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(div. low)", "Fresh intervention\n(div. high)",
                                             "Pasteurized intervention\n(div. low)", "Pasteurized intervention\n(div. high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_divLow_effect, dat_signif$freshInt_divHigh_effect,
                                         dat_signif$pastInt_divLow_effect,  dat_signif$pastInt_divHigh_effect),
                       pvalue        = c(dat_signif$freshInt_divLow_pvalue, dat_signif$freshInt_divHigh_pvalue,
                                         dat_signif$pastInt_divLow_pvalue,  dat_signif$pastInt_divHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_divLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot_divIntp <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.", "Fresh intervention\n(div. low)", "Fresh intervention\n(div. high)",
                                                        "Pasteurized intervention\n(div. low)", "Pasteurized intervention\n(div. high)")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"))
Create the age interaction data

… same as for the above plot, but also including species that showed significances for any other interaction

Code
# prepare the base dataset for the plot
dat_signif <- dat_results %>% 
  filter((freshInt_divLow_pvalue < 0.05) | (freshInt_divHigh_pvalue < 0.05) |
           (pastInt_divLow_pvalue < 0.05) | (pastInt_divHigh_pvalue < 0.05) |
           (freshInt_ageLow_pvalue < 0.05) | (freshInt_ageHigh_pvalue < 0.05) |
           (pastInt_ageLow_pvalue < 0.05) | (pastInt_ageHigh_pvalue < 0.05) |
           (freshInt_sexM_pvalue < 0.05) | (freshInt_sexW_pvalue < 0.05) |
           (pastInt_sexM_pvalue < 0.05) | (pastInt_sexW_pvalue < 0.05) |
           (freshInt_bmiLow_pvalue < 0.05) | (freshInt_bmiHigh_pvalue < 0.05) |
           (pastInt_bmiLow_pvalue < 0.05) | (pastInt_bmiHigh_pvalue < 0.05) |
           (freshInt_fibLow_pvalue < 0.05) | (freshInt_fibHigh_pvalue < 0.05) |
           (pastInt_fibLow_pvalue < 0.05) | (pastInt_fibHigh_pvalue < 0.05))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(age low)", "Fresh intervention\n(age high)",
                                             "Pasteurized intervention\n(age low)", "Pasteurized intervention\n(age high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_ageLow_effect, dat_signif$freshInt_ageHigh_effect,
                                         dat_signif$pastInt_ageLow_effect,  dat_signif$pastInt_ageHigh_effect),
                       pvalue        = c(dat_signif$freshInt_ageLow_pvalue, dat_signif$freshInt_ageHigh_pvalue,
                                         dat_signif$pastInt_ageLow_pvalue,  dat_signif$pastInt_ageHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_ageLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot_ageIntp <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.", "Fresh intervention\n(age low)", "Fresh intervention\n(age high)",
                                                        "Pasteurized intervention\n(age low)", "Pasteurized intervention\n(age high)")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"))
create the sex interaction data

… same as for the above plot, but also including species that showed significances for any other interaction

Code
dat_signif <- dat_results %>% 
  filter((freshInt_divLow_pvalue < 0.05) | (freshInt_divHigh_pvalue < 0.05) |
           (pastInt_divLow_pvalue < 0.05) | (pastInt_divHigh_pvalue < 0.05) |
           (freshInt_ageLow_pvalue < 0.05) | (freshInt_ageHigh_pvalue < 0.05) |
           (pastInt_ageLow_pvalue < 0.05) | (pastInt_ageHigh_pvalue < 0.05) |
           (freshInt_sexM_pvalue < 0.05) | (freshInt_sexW_pvalue < 0.05) |
           (pastInt_sexM_pvalue < 0.05) | (pastInt_sexW_pvalue < 0.05) |
           (freshInt_bmiLow_pvalue < 0.05) | (freshInt_bmiHigh_pvalue < 0.05) |
           (pastInt_bmiLow_pvalue < 0.05) | (pastInt_bmiHigh_pvalue < 0.05) |
           (freshInt_fibLow_pvalue < 0.05) | (freshInt_fibHigh_pvalue < 0.05) |
           (pastInt_fibLow_pvalue < 0.05) | (pastInt_fibHigh_pvalue < 0.05))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(gender male)", "Fresh intervention\n(gender female)",
                                             "Pasteurized intervention\n(gender male)", "Pasteurized intervention\n(gender female)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_sexM_effect, dat_signif$freshInt_sexW_effect,
                                         dat_signif$pastInt_sexM_effect,  dat_signif$pastInt_sexW_effect),
                       pvalue        = c(dat_signif$freshInt_sexM_pvalue, dat_signif$freshInt_sexW_pvalue,
                                         dat_signif$pastInt_sexM_pvalue,  dat_signif$pastInt_sexW_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_sexM_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot_sexIntp <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.", "Fresh intervention\n(gender male)", "Fresh intervention\n(gender female)",
                                                        "Pasteurized intervention\n(gender male)", "Pasteurized intervention\n(gender female)")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"))
create the BMI interaction data

… same as for the above plot, but also including species that showed significances for any other interaction

Code
dat_signif <- dat_results %>% 
  filter((freshInt_divLow_pvalue < 0.05) | (freshInt_divHigh_pvalue < 0.05) |
           (pastInt_divLow_pvalue < 0.05) | (pastInt_divHigh_pvalue < 0.05) |
           (freshInt_ageLow_pvalue < 0.05) | (freshInt_ageHigh_pvalue < 0.05) |
           (pastInt_ageLow_pvalue < 0.05) | (pastInt_ageHigh_pvalue < 0.05) |
           (freshInt_sexM_pvalue < 0.05) | (freshInt_sexW_pvalue < 0.05) |
           (pastInt_sexM_pvalue < 0.05) | (pastInt_sexW_pvalue < 0.05) |
           (freshInt_bmiLow_pvalue < 0.05) | (freshInt_bmiHigh_pvalue < 0.05) |
           (pastInt_bmiLow_pvalue < 0.05) | (pastInt_bmiHigh_pvalue < 0.05) |
           (freshInt_fibLow_pvalue < 0.05) | (freshInt_fibHigh_pvalue < 0.05) |
           (pastInt_fibLow_pvalue < 0.05) | (pastInt_fibHigh_pvalue < 0.05))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(BMI low)", "Fresh intervention\n(BMI high)",
                                             "Pasteurized intervention\n(BMI low)", "Pasteurized intervention\n(BMI high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_bmiLow_effect, dat_signif$freshInt_bmiHigh_effect,
                                         dat_signif$pastInt_bmiLow_effect,  dat_signif$pastInt_bmiHigh_effect),
                       pvalue        = c(dat_signif$freshInt_bmiLow_pvalue, dat_signif$freshInt_bmiHigh_pvalue,
                                         dat_signif$pastInt_bmiLow_pvalue,  dat_signif$pastInt_bmiHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_bmiLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot_bmiIntp <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.", "Fresh intervention\n(BMI low)", "Fresh intervention\n(BMI high)",
                                                        "Pasteurized intervention\n(BMI low)", "Pasteurized intervention\n(BMI high)")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"))
create the fiber interaction data

… same as for the above plot, but also including species that showed significances for any other interaction

Code
dat_signif <- dat_results %>% 
  filter((freshInt_divLow_pvalue < 0.05) | (freshInt_divHigh_pvalue < 0.05) |
           (pastInt_divLow_pvalue < 0.05) | (pastInt_divHigh_pvalue < 0.05) |
           (freshInt_ageLow_pvalue < 0.05) | (freshInt_ageHigh_pvalue < 0.05) |
           (pastInt_ageLow_pvalue < 0.05) | (pastInt_ageHigh_pvalue < 0.05) |
           (freshInt_sexM_pvalue < 0.05) | (freshInt_sexW_pvalue < 0.05) |
           (pastInt_sexM_pvalue < 0.05) | (pastInt_sexW_pvalue < 0.05) |
           (freshInt_bmiLow_pvalue < 0.05) | (freshInt_bmiHigh_pvalue < 0.05) |
           (pastInt_bmiLow_pvalue < 0.05) | (pastInt_bmiHigh_pvalue < 0.05) |
           (freshInt_fibLow_pvalue < 0.05) | (freshInt_fibHigh_pvalue < 0.05) |
           (pastInt_fibLow_pvalue < 0.05) | (pastInt_fibHigh_pvalue < 0.05))
dat_plot <- data.frame(species       = rep(dat_signif$species,  times = 4),
                       baseline_medianRelAbd = rep(dat_signif$baseline_medianRelAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(fiber low)", "Fresh intervention\n(fiber high)",
                                             "Pasteurized intervention\n(fiber low)", "Pasteurized intervention\n(fiber high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_fibLow_effect, dat_signif$freshInt_fibHigh_effect,
                                         dat_signif$pastInt_fibLow_effect,  dat_signif$pastInt_fibHigh_effect),
                       pvalue        = c(dat_signif$freshInt_fibLow_pvalue, dat_signif$freshInt_fibHigh_pvalue,
                                         dat_signif$pastInt_fibLow_pvalue,  dat_signif$pastInt_fibHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_fibLow_effect, times = 4)) %>% 
  mutate(species       = gsub(pattern = "species:", replacement = "", x = species)) %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% distinct(species, .keep_all = TRUE), by = "species") %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         species       = gsub(pattern = "_", replacement = " ", x = species),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$species)), function(spec) {
  dat_spec <- dat_plot %>% filter(species == spec)
  dat_spec %>% 
    dplyr::bind_rows(data.frame(species       = spec,
                                intervention  = "Baseline median rel. abd.",
                                phylum        = dat_spec$phylum[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_spec$baseline_medianRelAbd < .0001 ~ "<0.0001%",
                                                          dat_spec$baseline_medianRelAbd < .001  ~ paste0(round(dat_spec$baseline_medianRelAbd, 4), "%"),
                                                          dat_spec$baseline_medianRelAbd < .01   ~ paste0(round(dat_spec$baseline_medianRelAbd, 3), "%"),
                                                          dat_spec$baseline_medianRelAbd < .1    ~ paste0(round(dat_spec$baseline_medianRelAbd, 2), "%"),
                                                          dat_spec$baseline_medianRelAbd < 1     ~ paste0(round(dat_spec$baseline_medianRelAbd, 1), "%"),
                                                          TRUE ~ paste0(round(dat_spec$baseline_medianRelAbd, 0), "%"))))
})
dat_plot_fibIntp <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(species      = factor(species, levels = rev(unique(species))),
         intervention = factor(intervention, levels = c("Baseline median rel. abd.", "Fresh intervention\n(fiber low)", "Fresh intervention\n(fiber high)",
                                                        "Pasteurized intervention\n(fiber low)", "Pasteurized intervention\n(fiber high)")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"))
create the joint plot
Code
# create tree plot
tse_filtered <- tse[rowData(tse)$species %in% gsub(dat_plot_ageIntp$species, pattern = " ", replacement = "_"),]
gg_tree <- mia::taxonomyTree(tse_filtered) %>% ggtree()

# make sure the main heatmap has the same ordering as the tree
gg_tree_build    <- ggplot_build(gg_tree + geom_tiplab()) # add tiplab to retrieve species labels
species_ordering <- gg_tree_build$data[[3]] %>% arrange(desc(y)) %>% pull(label)
species_ordering <- species_ordering %>% 
  gsub(pattern = "species:", replacement = "") %>% 
  gsub(pattern = "_",        replacement = " ")
dat_plot_ageIntp <- dat_plot_ageIntp %>% mutate(species = factor(species, levels = rev(species_ordering)))
dat_plot_sexIntp <- dat_plot_sexIntp %>% mutate(species = factor(species, levels = rev(species_ordering)))
dat_plot_bmiIntp <- dat_plot_bmiIntp %>% mutate(species = factor(species, levels = rev(species_ordering)))

# plots
gg_heatmap_divInt <- dat_plot_divIntp %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank())
gg_heatmap_ageInt <- dat_plot_ageIntp %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  ggtitle("Intervention effects on species") +
  theme(axis.title  = element_blank(),
        axis.text.y = element_blank(),
        panel.grid  = element_blank())
gg_heatmap_sexInt <- dat_plot_sexIntp %>% 
  filter(!grepl("Baseline", intervention)) %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  ggtitle("Intervention effects on species") +
  theme(axis.title  = element_blank(),
        axis.text.y = element_blank(),
        panel.grid  = element_blank())
gg_heatmap_bmiInt <- dat_plot_bmiIntp %>% 
  filter(!grepl("Baseline", intervention)) %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  ggtitle("Intervention effects on species") +
  theme(axis.title  = element_blank(),
        axis.text.y = element_blank(),
        panel.grid  = element_blank())
gg_heatmap_fibInt <- dat_plot_fibIntp %>% 
  filter(!grepl("Baseline", intervention)) %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  ggtitle("Intervention effects on species") +
  theme(axis.title  = element_blank(),
        axis.text.y = element_blank(),
        panel.grid  = element_blank())

# align the two plots
(gg_tree + gg_heatmap_divInt + gg_heatmap_ageInt + gg_heatmap_sexInt + gg_heatmap_bmiInt + gg_heatmap_fibInt) +
  patchwork::plot_layout(widths = c(.2, .3, .25, .25, .25, .25), nrow = 1, guides = "collect")

Code
# ggsave("functional_speciesScreening_allInteractions_pvalues.png", width = 40, height = 30)

Joint plot of condensed information

Focus on the four biggest phylum groups: Firmicutes, Bacteroidetes, Actinobacteria and Proteobacteria

Code
# data preparation
plot_dat_wide <- dat_results %>% 
  dplyr::left_join(dat %>% select(species, phylum) %>% mutate(species = paste0("species:", species)) %>% distinct(),
                   by = "species") %>% 
  filter(phylum %in% c("Firmicutes", "Bacteroidetes", "Actinobacteria", "Proteobacteria")) %>% 
  mutate(phylum = droplevels(phylum)) %>% 
  group_by(phylum) %>% 
  summarize(n_species                    = n(),
            n_freshSignifEffects_overall = sum(freshInt_pvalue         < 0.05),
            n_freshSignifEffects_divLow  = sum(freshInt_divLow_pvalue  < 0.05),
            n_freshSignifEffects_divHigh = sum(freshInt_divHigh_pvalue < 0.05),
            n_freshSignifEffects_ageLow  = sum(freshInt_ageLow_pvalue  < 0.05),
            n_freshSignifEffects_ageHigh = sum(freshInt_ageHigh_pvalue < 0.05),
            n_freshSignifEffects_sexM    = sum(freshInt_sexM_pvalue    < 0.05),
            n_freshSignifEffects_sexW    = sum(freshInt_sexW_pvalue    < 0.05),
            n_freshSignifEffects_bmiLow  = sum(freshInt_bmiLow_pvalue  < 0.05),
            n_freshSignifEffects_bmiHigh = sum(freshInt_bmiHigh_pvalue < 0.05),
            n_freshSignifEffects_fibLow  = sum(freshInt_fibLow_pvalue  < 0.05),
            n_freshSignifEffects_fibHigh = sum(freshInt_fibHigh_pvalue < 0.05),
            n_pastSignifEffects_overall  = sum(pastInt_pvalue          < 0.05),
            n_pastSignifEffects_divLow   = sum(pastInt_divLow_pvalue   < 0.05),
            n_pastSignifEffects_divHigh  = sum(pastInt_divHigh_pvalue  < 0.05),
            n_pastSignifEffects_ageLow   = sum(pastInt_ageLow_pvalue   < 0.05),
            n_pastSignifEffects_ageHigh  = sum(pastInt_ageHigh_pvalue  < 0.05),
            n_pastSignifEffects_sexM     = sum(pastInt_sexM_pvalue     < 0.05),
            n_pastSignifEffects_sexW     = sum(pastInt_sexW_pvalue     < 0.05),
            n_pastSignifEffects_bmiLow   = sum(pastInt_bmiLow_pvalue   < 0.05),
            n_pastSignifEffects_bmiHigh  = sum(pastInt_bmiHigh_pvalue  < 0.05),
            n_pastSignifEffects_fibLow   = sum(pastInt_fibLow_pvalue   < 0.05),
            n_pastSignifEffects_fibHigh  = sum(pastInt_fibHigh_pvalue  < 0.05))


# reshape dataset to long format
plot_dat_long <- data.frame(phylum               = rep(plot_dat_wide$phylum, times = sum(grepl("freshSignifEffects", colnames(plot_dat_wide)))),
                            n_species            = rep(plot_dat_wide$n_species, times = sum(grepl("freshSignifEffects", colnames(plot_dat_wide)))),
                            effect_group         = rep(c("overall", "divInt", "divInt", "ageInt", "ageInt", "sexInt", "sexInt", "bmiInt", "bmiInt", "fibInt", "fibInt"),
                                                       each = nrow(plot_dat_wide)),
                            effect_type          = rep(c("overall", "divLow", "divHigh", "ageLow", "ageHigh", "sexM", "sexW", "bmiLow", "bmiHigh", "fibLow", "fibHigh"),
                                                       each = nrow(plot_dat_wide)),
                            n_freshSignifEffects = c(plot_dat_wide$n_freshSignifEffects_overall,
                                                     plot_dat_wide$n_freshSignifEffects_divLow,
                                                     plot_dat_wide$n_freshSignifEffects_divHigh,
                                                     plot_dat_wide$n_freshSignifEffects_ageLow,
                                                     plot_dat_wide$n_freshSignifEffects_ageHigh,
                                                     plot_dat_wide$n_freshSignifEffects_sexM,
                                                     plot_dat_wide$n_freshSignifEffects_sexW,
                                                     plot_dat_wide$n_freshSignifEffects_bmiLow,
                                                     plot_dat_wide$n_freshSignifEffects_bmiHigh,
                                                     plot_dat_wide$n_freshSignifEffects_fibLow,
                                                     plot_dat_wide$n_freshSignifEffects_fibHigh),
                            n_pastSignifEffects  = c(plot_dat_wide$n_pastSignifEffects_overall,
                                                     plot_dat_wide$n_pastSignifEffects_divLow,
                                                     plot_dat_wide$n_pastSignifEffects_divHigh,
                                                     plot_dat_wide$n_pastSignifEffects_ageLow,
                                                     plot_dat_wide$n_pastSignifEffects_ageHigh,
                                                     plot_dat_wide$n_pastSignifEffects_sexM,
                                                     plot_dat_wide$n_pastSignifEffects_sexW,
                                                     plot_dat_wide$n_pastSignifEffects_bmiLow,
                                                     plot_dat_wide$n_pastSignifEffects_bmiHigh,
                                                     plot_dat_wide$n_pastSignifEffects_fibLow,
                                                     plot_dat_wide$n_pastSignifEffects_fibHigh)) %>% 
  mutate(phylum_label = paste0(phylum, "\n", n_species, " species"),
         effect_group = factor(effect_group, levels = c("overall", "divInt", "ageInt", "sexInt", "bmiInt", "fibInt")),
         effect_type  = factor(effect_type, levels = rev(c("overall", "divLow", "divHigh", "ageLow", "ageHigh", "sexM", "sexW", "bmiLow", "bmiHigh", "fibLow", "fibHigh"))))

# make the dataset even longer
plot_dat_longer <- data.frame(phylum       = rep(plot_dat_long$phylum,       times = 2),
                              n_species    = rep(plot_dat_long$n_species,    times = 2),
                              phylum_label = rep(plot_dat_long$phylum_label, times = 2),
                              effect_group = rep(plot_dat_long$effect_group, times = 2),
                              effect_type  = rep(plot_dat_long$effect_type,  times = 2),
                              intervention = rep(c("Fresh", "Past."), each = nrow(plot_dat_long)),
                              parameter    = "n_signifEffects",
                              value        = c(plot_dat_long$n_freshSignifEffects,
                                               plot_dat_long$n_pastSignifEffects)) %>% 
  group_by(phylum, parameter, effect_group, intervention) %>% 
  mutate(alpha_value = case_when(effect_group == "overall"  ~ 0.8,
                                 value == min(value)        ~ 0,
                                 TRUE                       ~ 1)) %>% 
  ungroup() %>% 
  mutate(fill_tiles = case_when(effect_type == "overall" ~ "no",
                                TRUE ~ phylum),
         alpha_tiles = case_when(effect_type %in% c("ageLow", "sexM", "bmiLow") ~ 0.05,
                                 TRUE ~ 0.15))
plot_dat_longer <- plot_dat_longer %>% mutate(phylum_label = factor(phylum_label, levels = plot_dat_longer %>% arrange(desc(n_species)) %>% 
                                                                      pull(phylum_label) %>% unique()))


# create a color vector for color coding the interaction terms
col_vector <- RColorBrewer::brewer.pal(4, name = "Dark2")

# plot
gg_allInt_table <- ggplot(plot_dat_longer, aes(x = intervention, y = effect_type)) +
  geom_tile(aes(fill = fill_tiles, alpha = alpha_tiles)) +
  geom_text(aes(label = value, alpha = alpha_value)) +
  facet_grid(effect_group ~ phylum_label, scales = "free") +
  scale_fill_manual(values = c("no"             = "white",
                               "Firmicutes"     = col_vector[1],
                               "Bacteroidetes"  = col_vector[2],
                               "Actinobacteria" = col_vector[3],
                               "Proteobacteria" = col_vector[4])) +
  theme(axis.title      = element_blank(),
        panel.grid      = element_blank(),
        legend.position = "none",
        strip.background.x = element_rect(fill = "gray97", color = "gray97"),
        strip.text.y    = element_blank())
gg_allInt_table

Code
# ggsave("functional_speciesScreening_allInteractions_table.png", width = 7, height = 5)

Main joint figures for the publication

Create final joint plot

Code
# 1) plot of overall effects
gg_overall <- dat_plot_overall %>% 
  mutate(pvalue_signif = gsub(pvalue_signif, pattern = "significant ",  replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = " intervention", replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = "pasteurized",   replacement = "past."),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant", "pos. effect (fresh)",
                                                          "neg. effect (fresh)", "pos. effect (past.)",
                                                          "neg. effect (past.)", "")),
         effect_label  = gsub(effect_label, pattern = "\n", replacement = " ")) %>% 
  ggplot(aes(x = intervention, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label, size = text_size)) +
  scale_fill_manual("q-value", values = c("not significant"     = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  scale_size(range = c(3.5, 4), guide = "none") +
  theme_minimal(base_size = 14.5) +
  theme(plot.title        = element_text(hjust = 0.5),
        axis.title        = element_blank(),
        panel.grid        = element_blank(),
        strip.background  = element_rect(fill = "gray97", color = "gray97"),
        legend.position   = "none")

# 1.5) create the legend separately, also to ensure that all color categories are plotted
category_labels <- c("not significant","pos. effect (fresh)","neg. effect (fresh)",
                                     "pos. effect (past.)","neg. effect (past.)")
gg_base    <- data.frame(x       = rnorm(n = 5),
                         y       = rnorm(n = 5),
                         col_var = factor(category_labels, levels = category_labels)) %>% 
  ggplot(aes(x = x, y = y, fill = col_var)) +
  geom_tile() +
  scale_fill_manual("q-value", values = c("not significant"     = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  theme_minimal(base_size = 15) +
  theme(legend.position = "bottom")
gg_legend  <- ggpubr::get_legend(gg_base) %>% ggpubr::as_ggplot()


# 2) plot of diversity-specific effects
gg_div <- dat_plot_divInt %>% 
  filter(intervention != "Baseline median rel. abd.") %>% 
  mutate(pvalue_signif = gsub(pvalue_signif, pattern = "significant ",  replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = " intervention", replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = "pasteurized",   replacement = "past."),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant", "pos. effect (fresh)",
                                                          "neg. effect (fresh)", "pos. effect (past.)",
                                                          "neg. effect (past.)", ""))) %>% 
  mutate(intervention2 = gsub(intervention,  pattern = "Fresh intervention\n\\(",       replacement = ""),
         intervention2 = gsub(intervention2, pattern = "Pasteurized intervention\n\\(", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "\\)", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "div. low",  replacement = "div. [2.8, 3.85)"),
         intervention2 = gsub(intervention2, pattern = "div. high", replacement = "div. [3.85, 4.4)"),
         intervention3 = gsub(intervention,  pattern = "\n\\(div. low\\)",  replacement = ""),
         intervention3 = gsub(intervention3, pattern = "\n\\(div. high\\)", replacement = "")) %>%
  ggplot(aes(x = intervention2, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  guides(fill = guide_legend(nrow = 1)) +
  # facet_grid(rows = vars(phylum), cols = vars(intervention3), scales = "free", space = "free") +
  facet_grid(cols = vars(intervention3), scales = "free", space = "free") +
  # ggtitle("diversity-stratified effects") +
  theme_minimal(base_size = 15) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.text.x     = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"),
        legend.position  = "none")

# 3) plot of age-specific effects
gg_age <- dat_plot_ageInt %>% 
  filter(intervention != "Baseline median rel. abd.") %>% 
  mutate(pvalue_signif = gsub(pvalue_signif, pattern = "significant ",  replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = " intervention", replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = "pasteurized",   replacement = "past."),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant", "pos. effect (fresh)",
                                                          "neg. effect (fresh)", "pos. effect (past.)",
                                                          "neg. effect (past.)", ""))) %>% 
  mutate(intervention2 = gsub(intervention,  pattern = "Fresh intervention\n\\(",       replacement = ""),
         intervention2 = gsub(intervention2, pattern = "Pasteurized intervention\n\\(", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "\\)", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "age low",  replacement = "age 21-49"),
         intervention2 = gsub(intervention2, pattern = "age high", replacement = "age 50-69"),
         intervention3 = gsub(intervention,  pattern = "\n\\(age low\\)",  replacement = ""),
         intervention3 = gsub(intervention3, pattern = "\n\\(age high\\)", replacement = "")) %>%
  ggplot(aes(x = intervention2, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  guides(fill = guide_legend(nrow = 1)) +
  # facet_grid(rows = vars(phylum), cols = vars(intervention3), scales = "free", space = "free") +
  facet_grid(cols = vars(intervention3), scales = "free", space = "free") +
  # ggtitle("age-stratified effects") +
  theme_minimal(base_size = 15) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.text.x     = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"),
        legend.position  = "none")

# 4) plot of gender-specific effects
gg_sex <- dat_plot_sexInt %>% 
  filter(intervention != "Baseline median rel. abd.") %>% 
  mutate(pvalue_signif = gsub(pvalue_signif, pattern = "significant ",  replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = " intervention", replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = "pasteurized",   replacement = "past."),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant", "pos. effect (fresh)",
                                                          "neg. effect (fresh)", "pos. effect (past.)",
                                                          "neg. effect (past.)", ""))) %>% 
  mutate(intervention2 = gsub(intervention,  pattern = "Fresh intervention\n\\(",       replacement = ""),
         intervention2 = gsub(intervention2, pattern = "Pasteurized intervention\n\\(", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "\\)", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "gender ",  replacement = ""),
         intervention2 = factor(intervention2, levels = c("male", "female")),
         intervention3 = gsub(intervention,  pattern = "\n\\(gender male\\)",  replacement = ""),
         intervention3 = gsub(intervention3, pattern = "\n\\(gender female\\)", replacement = "")) %>%
  ggplot(aes(x = intervention2, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  # facet_grid(rows = vars(phylum), cols = vars(intervention3), scales = "free", space = "free") +
  facet_grid(cols = vars(intervention3), scales = "free", space = "free") +
  # ggtitle("Gender-specific effects") +
  theme_minimal(base_size = 15) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.text.x     = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"),
        legend.position = "none")

# 5) plot of BMI-specific effects
gg_bmi <- dat_plot_bmiInt %>% 
  filter(intervention != "Baseline median rel. abd.") %>% 
  mutate(pvalue_signif = gsub(pvalue_signif, pattern = "significant ",  replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = " intervention", replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = "pasteurized",   replacement = "past."),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant", "pos. effect (fresh)",
                                                          "neg. effect (fresh)", "pos. effect (past.)",
                                                          "neg. effect (past.)", ""))) %>% 
  mutate(intervention2 = gsub(intervention,  pattern = "Fresh intervention\n\\(",       replacement = ""),
         intervention2 = gsub(intervention2, pattern = "Pasteurized intervention\n\\(", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "\\)", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "BMI low",  replacement = "BMI [18, 25)"),
         intervention2 = gsub(intervention2, pattern = "BMI high", replacement = "BMI [25, 31]"),
         intervention3 = gsub(intervention,  pattern = "\n\\(BMI low\\)",  replacement = ""),
         intervention3 = gsub(intervention3, pattern = "\n\\(BMI high\\)", replacement = "")) %>%
  ggplot(aes(x = intervention2, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  # facet_grid(rows = vars(phylum), cols = vars(intervention3), scales = "free", space = "free") +
  facet_grid(cols = vars(intervention3), scales = "free", space = "free") +
  # ggtitle("BMI-specific effects") +
  theme_minimal(base_size = 15) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.text.x     = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"),
        legend.position = "none")

# 6) plot of fiber-specific effects
gg_fib <- dat_plot_fibInt %>% 
  filter(intervention != "Baseline median rel. abd.") %>% 
  mutate(pvalue_signif = gsub(pvalue_signif, pattern = "significant ",  replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = " intervention", replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = "pasteurized",   replacement = "past."),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant", "pos. effect (fresh)",
                                                          "neg. effect (fresh)", "pos. effect (past.)",
                                                          "neg. effect (past.)", ""))) %>% 
  mutate(intervention2 = gsub(intervention,  pattern = "Fresh intervention\n\\(",       replacement = ""),
         intervention2 = gsub(intervention2, pattern = "Pasteurized intervention\n\\(", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "\\)", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "fiber low",  replacement = "fiber (8, 30)"),
         intervention2 = gsub(intervention2, pattern = "fiber high", replacement = "fiber [30, 58)"),
         intervention3 = gsub(intervention,  pattern = "\n\\(fiber low\\)",  replacement = ""),
         intervention3 = gsub(intervention3, pattern = "\n\\(fiber high\\)", replacement = "")) %>%
  ggplot(aes(x = intervention2, y = species)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  # facet_grid(rows = vars(phylum), cols = vars(intervention3), scales = "free", space = "free") +
  facet_grid(cols = vars(intervention3), scales = "free", space = "free") +
  # ggtitle("fiber-specific effects") +
  theme_minimal(base_size = 15) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.text.x     = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"),
        legend.position = "none")

# 7) correlation matrix between selected species and SCFA-related genes
# Average correlations between a set of relevant species and all the genes that are related to a specific SCFA.
# Species are sorted by their average correlation with Butyryl-CoA genes.
species_vector <- c("Anaerostipes_hadrus",
                    "GGB9631_SGB15085",
                    "Lacticaseibacillus_paracasei",
                    "GGB9603_SGB15035",
                    "GGB9561_SGB14972",
                    "Phocaeicola_dorei",
                    "GGB9512_SGB14909",
                    "Blautia_faecis",
                    "Blautia_obeum")
plot_dat <- plot_dat %>% 
  filter(species %in% species_vector) %>% 
  mutate(species = gsub(pattern = "\\_", replacement = " ", species)) %>% 
  group_by(species, SCFA) %>% 
  summarize(phylum    = first(phylum),
            mean_corr = mean(corr))
`summarise()` has grouped output by 'species'. You can override using the
`.groups` argument.
Code
# sort species according to the correlation with the Butyryl-CoA
species_sort <- plot_dat %>% 
  filter(SCFA == "Butyryl-CoA") %>% 
  group_by(species) %>% 
  summarize(avg_corr = mean(mean_corr)) %>% 
  arrange(desc(avg_corr)) %>% 
  pull(species)
plot_dat <- plot_dat %>% 
  mutate(species    = factor(species, levels = rev(species_sort)),
         corr_label = round(mean_corr, 2))
# create correlation matrix plot
gg_corrMatrix <- plot_dat %>% 
  mutate(SCFA = case_when(SCFA == "succinate"  ~ "Succinate",
                          SCFA == "propionate" ~ "Propionate",
                          TRUE                 ~ SCFA)) %>% 
  ggplot(aes(y = species, x = SCFA)) +
  geom_tile(aes(fill = mean_corr)) +
  geom_text(aes(label = corr_label)) +
  scale_fill_gradient2("average correlation with genes", low = "#A8896C", high = "#84C2A9", mid = "white") +
  theme_minimal(base_size = 14) +
  theme(axis.title      = element_blank(),
        axis.ticks      = element_blank(),
        panel.grid      = element_blank(),
        legend.position = "bottom",
        plot.background = element_rect(fill = "white", color = "white"))

# 8) small plot just for adding some in between titles to the joint plot
text_dat <- data.frame(x = 0.5,
                       y = 0.6,
                       label = c("Overall effects on species' abundance", "Stratified effects"))
gg_header_base <- ggplot() +
  geom_hline(yintercept = 0) +
  xlim(c(0,1)) + ylim(c(0,1)) +
  theme_minimal(base_size = 15) +
  theme(panel.grid        = element_blank(),
        plot.tag.location = "plot",
        plot.tag          = element_text(size = 18, margin = margin(t = -5)),
        axis.text         = element_blank(),
        axis.title        = element_blank(),
        plot.margin       = margin(t = 0, b = 0, l = 0, r = 0),
        legend.position = "none")
gg_headerA <- gg_header_base +
  geom_text(data = text_dat %>% dplyr::slice(1), aes(x = x, y = y, label = label), size = 5) +
  labs(tag = "(A)")
gg_headerB <- gg_header_base +
  geom_text(data = text_dat %>% dplyr::slice(2), aes(x = x, y = y, label = label), size = 5) +
  labs(tag = "(B)")

# 9) joint plot
(gg_headerA + gg_overall + gg_legend + gg_headerB + gg_div) +
  patchwork::plot_layout(ncol = 1, heights = c(.15,.9,.7,.15,.5))

Code
# ggsave("Figure4_speciesScreening.png", width = 13, height = 6)
Further interaction effects
Code
(gg_age + gg_sex + gg_bmi + gg_fib) +
  patchwork::plot_layout(ncol = 1, heights = c(1,.2,.6,.45))

Code
# ggsave("FigureS11_speciesScreening_stratifications.png", width = 13, height = 8)
Correlation matrix
Code
gg_corrMatrix

Code
# ggsave("FigureS12_corrMatrix.png", width = 8, height = 5)
Plots for supplements
Code
# 5) plot of the overview table of the uncorrected p-value tendencies
gg_table <- plot_dat_longer %>% 
  mutate(effect_type = case_when(effect_type == "ageLow"  ~ "age 21-49",
                                 effect_type == "ageHigh" ~ "age 50-69",
                                 effect_type == "sexM"    ~ "male",
                                 effect_type == "sexW"    ~ "female",
                                 effect_type == "bmiLow"  ~ "BMI [18, 25)",
                                 effect_type == "bmiHigh" ~ "BMI [25, 31]",
                                 TRUE                     ~ effect_type),
         effect_type = factor(effect_type, levels = c("overall",
                                                      "age 50-69", "age 21-49",
                                                      "female", "male",
                                                      "BMI [25, 31]", "BMI [18, 25)"))) %>% 
  ggplot(aes(x = intervention, y = effect_type)) +
  geom_tile(aes(fill = fill_tiles, alpha = alpha_tiles)) +
  geom_text(aes(label = value, alpha = alpha_value)) +
  facet_grid(effect_group ~ phylum_label, scales = "free") +
  scale_fill_manual(values = c("no"             = "white",
                               "Firmicutes"     = col_vector[1],
                               "Bacteroidetes"  = col_vector[2],
                               "Actinobacteria" = col_vector[3],
                               "Proteobacteria" = col_vector[4])) +
  theme_minimal(base_size = 15) +
  theme(plot.title      = element_text(hjust = 0.5),
        axis.title      = element_blank(),
        panel.grid      = element_blank(),
        legend.position = "none",
        strip.background.x = element_rect(fill = "gray97", color = "gray97"),
        strip.text.y    = element_blank())

gg_table +
  theme(panel.grid.minor = element_blank(),
        plot.background  = element_rect(fill = "white", color = "white"))

Code
ggsave("FigureSxxx_speciesScreening_trends.png", width = 9, height = 4.5)
Code
gg1 <- gg_pvalues + theme(legend.position = "none")
gg2 <- gg_qvalues +
  guides(fill = guide_legend(nrow=5)) +
  theme(legend.position = "bottom")
gg3 <- gg_allInt_table

layout <- "
ABC
ABD
ABE
ABE
ABE
"

(gg1 + gg2 + ggplot() + gg3) +
  patchwork::plot_layout(design  = layout,
                         widths  = c(.2,.5,.3))

Code
# ggsave("functional_speciesScreening_allPlots.png", width = 20, height = 13)